Data Preparation

Set Environment and load R

## Get interactive session ##
#  srun --time=08:00:00 --mem=40G -p int --pty bash

# module purge;source /camp/stp/babs/working/software/modulepath_new_software_tree_2018-08-13;module load pandoc/2.2.3.2-foss-2016b;ml R/3.6.0-foss-2016b-BABS;R;

# sbatch --time=08:00:00 --wrap "module purge;source /camp/stp/babs/working/software/modulepath_new_software_tree_2018-08-13;module load pandoc/2.2.3.2-foss-2016b;ml R/3.6.0-foss-2016b-BABS;Rscript runB.r" --job-name="rB" --mem=42G -o rB.slurm >> commands.txt

# sbatch --time=12:00:00 --wrap "module purge;source /camp/stp/babs/working/software/modulepath_new_software_tree_2018-08-13;module load pandoc/2.2.3.2-foss-2016b;ml R/3.6.0-foss-2016b-BABS;Rscript runB.r" --job-name="rB" -p hmem --mem=300G -o rB.slurm >> commands.txt

Set Parameters

library(dplyr)
library(Seurat)
library(ggplot2)
library(tidyverse)
library(tidyr)
library(knitr)

## Setup plot collection object
VersionPdfExt <- paste0(".V", gsub("-", "", Sys.Date()), ".pdf")

if (dir.exists("/Volumes/babs/working/boeings/")){
    hpc.mount <- "/Volumes/babs/working/boeings/"
} else if (dir.exists("Y:/working/boeings/")){
    hpc.mount <- "Y:/working/boeings/"
} else if (dir.exists("/camp/stp/babs/working/boeings/")){
    hpc.mount <- "/camp/stp/babs/working/boeings/"
} else {
    hpc.mount <- ""
}

# source(
#     paste0(
#         hpc.mount,
#         "Stefan/protocol_files/github/boeings/packages/packageSourceCode/SBwebtools.pckg.r"
#     )
# )

# source(
#     paste0(
#         hpc.mount,
#         "Stefan/protocol_files/github/boeings/packages/scTools/scTools.r"
#     )
# )

source("assets/scTools.r")
source("assets/SBwebtools.pckg.r")



####

FN <- paste0(hpc.mount, "Projects/reference_data/documentation/BC.parameters.txt")
dbTable <- read.delim(
    FN, 
    sep = "\t",
    stringsAsFactors = F
)

db.pwd <- as.vector(dbTable[1,1])


if (length(.libPaths()) > 2){
    .libPaths(.libPaths()[2:3])
}

ObioFN <- paste0("../", list.files("..")[grep(".bioLOGIC.Robj", list.files(".."))])

load(ObioFN)

# checkFile = paste0(
#          Obio@parameterList$project_id,
#          ".bioLOGIC.Robj"
# )
# 
# if (ObioFN != checkFile){
#     exit()
# }


Obio <- setMountingPoint(Obio)
Obio <- setAnalysisPaths(Obio)
Obio <- setCrickGenomeAndGeneNameTable(Obio)
Obio <- createAnalysisFolders(
    Obio,
    baseDir="/camp/stp/babs/working/boeings/Projects/",
    localBaseDir = paste0(hpc.mount, "Projects/")
)
Obio <- setDataBaseParameters(Obio)
Obio <- addGeneAnnotation(Obio)

Obio@parameterList[["reportFigDir"]] <- paste0(Obio@parameterList$localWorkDir,Obio@parameterList$project_id, "/report_figures/")

## Create outputfolders ##
if (!dir.exists(paste0(Obio@parameterList$localWorkDir,Obio@parameterList$project_id))){
    dir.create(paste0(Obio@parameterList$localWorkDir,Obio@parameterList$project_id))
}

if (!dir.exists(Obio@parameterList$reportFigDir)){
    dir.create(Obio@parameterList$reportFigDir)
}

figureCount <- 1

## Load R module load R/3.5.1-foss-2018b ##
#setwd(Obio@parameterList$localWorkDir)

if (Obio@parameterList$host == "10.27.241.234"){
      urlString <- "biologic.thecrick.org"
    } else {
      urlString <- "biologic.crick.ac.uk"
    }
###############################################################################
## Compile reference gene list                                               ##
## Set gene reference list 
Obio@dataTableList[["referenceList"]] <- list()

if (is.null(Obio@parameterList$HmDisplayCatsFromDb)){
    Obio@parameterList$HmDisplayCatsFromDb <- list("TFs" = "ag_lab_categories__10")
}

## Add transcription factors that happen to be cluster markers ##
catList <- Obio@parameterList$HmDisplayCatsFromDb



if (Obio@parameterList$geneIDcolumn != "mgi_symbol" & Obio@parameterList$geneIDcolumn != "hgnc_symbol") {
    queryGS <- "hgnc_symbol" 
} else {
    queryGS <- Obio@parameterList$geneIDcolumn
}


for (i in 1:length(catList)){
    tempVec <- retrieve.gene.category.from.db(
        cat_id = catList[[i]],
        password = db.pwd,
        gene.symbol = queryGS,
        user = Obio@parameterList$db.user,
        host = Obio@parameterList$host
    )
    
    Obio@dataTableList$referenceList[[names(catList)[i]]] <- tempVec
}

## Transcription Factors in variable genes ##


#tempVec <- tempVec[tempVec %in% Obio@dataTableList$dfGeneralMarkers$gene]

Create Sample List

###############################################################################
## Create sample list filtered on MT and norm_counts_RNA                     ##
SampleList <- createNormSampleList(
    obj = Obio,
    reduce = NULL
)
## Done                                                                      ##
###############################################################################

###############################################################################
## Add doublet annotation, if present, to meta data                          ##

pos <- grep("DF_resultlist", names(Obio@dataTableList))

if (length(pos) > 0){
    sampleNames <- names(SampleList)
    for (i in 1:length(SampleList)){
        dfAdd <- Obio@dataTableList[["DF_resultlist"]][[sampleNames[i]]]
        row.names(dfAdd) <- gsub("-1", "",row.names(dfAdd))
        dfAdd <- dfAdd[row.names(dfAdd) %in% row.names(SampleList[[i]]@meta.data),]
        
        SampleList[[i]] <- addDf2seuratMetaData(
            obj = SampleList[[i]],
            dfAdd = dfAdd
        )
    }
}

## Done                                                                      ##
###############################################################################

###############################################################################
## Integrate Datasets                                                        ##
if (length(SampleList) > 1){
    if (Obio@parameterList$scIntegrationMethod == "SCT"){
        
        if (length(grep("scNintegrationFeatures", names(Obio@parameterList))) == 0){
            Obio@parameterList$scNintegrationFeatures = 3000
        }
        
        library(future)
        options(future.globals.maxSize = 7000 * 1024^2)
        plan("multiprocess", workers = 30)
        
        sample.features <- SelectIntegrationFeatures(
            object.list = SampleList, 
            nfeatures = Obio@parameterList$scNintegrationFeatures
        )
        SampleList <- PrepSCTIntegration(
            object.list = SampleList, 
            anchor.features = sample.features, 
            verbose = FALSE
        )
        
        sampleAnchors <- FindIntegrationAnchors(
            object.list = SampleList, 
            normalization.method = "SCT", 
            anchor.features = sample.features, 
            verbose = FALSE
        )
        
        OsC <- IntegrateData(
            anchorset = sampleAnchors, 
            normalization.method = "SCT", 
            verbose = FALSE
        )
        detach("package:future", unload=TRUE)
        
    } else {
    
        sampleAnchors <- FindIntegrationAnchors(
            object.list = SampleList, 
            dims = 1:30
        ) 

        OsC <- IntegrateData(
            #features.to.integrate = geneIntersectVec,
            anchorset = sampleAnchors, 
            dims = 1:30
        )
    }
    Obio@dataTableList$referenceList[["sampleAnchors"]] <- as.vector(sort(sampleAnchors@anchor.features))
} else {
    OsC <- SampleList[[1]]
}

Idents(OsC) <- factor(Idents(OsC), levels = names(Obio@sampleDetailList))

dotsize  = 1
if (nrow(OsC@meta.data) > 10000){
  dotsize  = 0.75
} else if (nrow(OsC@meta.data) > 20000){
  dotsize = 0.5
} else if (nrow(OsC@meta.data) > 50000){
  dotsize = 0.25
}

## Done integration                                                          ##
###############################################################################

###############################################################################
## Add additional annotation data                                            ##

# ###############################################################################
# ## Load extra annotation data                                                ##
# FN <- "/camp/stp/babs/working/boeings/Projects/swantonc/dhruva.biswas/335_CSL_DB_scRNA_seq_lung_tumor_microenvironment_EMTAB6653/basedata/MetaData.txt"
# 
# dfExtra <- read.delim(
#     FN,
#     header = T,
#     sep = "\t",
#     stringsAsFactors = F
# )
# 
# dfExtra <- dfExtra[dfExtra$cell %in% row.names(OsC@meta.data),]
# 
# names(dfExtra) <- gsub("^cluster$", "Article_Cluster", names(dfExtra))
# names(dfExtra) <- gsub("^CellType$", "Article_Cell_Type", names(dfExtra))
# names(dfExtra) <- gsub("^CellType$", "Article_Cell_Type", names(dfExtra))
# names(dfExtra) <- gsub("^CellType$", "Article_Cell_Type", names(dfExtra))
# 
# dfExtra[["Patient"]] <- sapply(dfExtra$Patient_piece, function(x) unlist(strsplit(x, "_"))[1])
# dfExtra$Patient <- paste0("P", dfExtra$Patient)
# dfExtra[["Region"]] <- sapply(dfExtra$Patient_piece, function(x) unlist(strsplit(x, "_"))[2])
# dfExtra$CellFromTumor <- as.character(dfExtra$CellFromTumor)
# dfExtra$CellFromTumor[dfExtra$CellFromTumor == "TRUE"] <- "Tumor"
# dfExtra$CellFromTumor[dfExtra$CellFromTumor == "FALSE"] <- "Non-Tumor"
# 
# 
# 
# row.names(dfExtra) <- dfExtra$cell
# 
# selVec <- c("CellFromTumor", "Patient", "Region")
# dfAdd <- dfExtra[,selVec]
# 
# OsC <- addDf2seuratMetaData(
#             obj = OsC,
#             dfAdd = dfAdd
#         )
# 
# ## Done extra annotation                                                     ##
# ###############################################################################

Result Figures

Quality Control Plots: Variable Features in the combined and all individual dataset

plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")

## First make variation plot for integrated samples, than for all individual samples separately
tag <- "Integrated_Samples"
DefaultAssay(OsC) <- "RNA"


OsC <- FindVariableFeatures(
    object = OsC,
    selection.method = 'vst', 
    nfeatures = 2000
)

# Identify the 10 most highly variable genes
label2000 <- paste0("integrated", "_", "top2000var")
Obio@dataTableList$referenceList[[ label2000]]<- head(
    x = VariableFeatures(object = OsC), 
    2000
)
    
label30 <- paste0("integrated", "_", "top30var")
    Obio@dataTableList$referenceList[[ label30]]<- head(
    x = VariableFeatures(object = OsC), 
    30
)


## slot for variable features OsC@assays$RNA@var.features

dfVar <- OsC@assays$RNA@meta.features
names(dfVar) <- gsub("vst.", "",names(dfVar))
dfVar[["gene"]] <- row.names(dfVar)

OsC@meta.data[["all"]] <- "all"
Idents(OsC) <- "all"
    
    
cluster.averages <- AverageExpression(
    OsC, 
    return.seurat = TRUE
)
    
Idents(OsC) <- "sampleID"
    
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
names(dfAvgExpr)[1] <- "Avg.Expression"

dfVar <- merge(
    dfVar, 
    dfAvgExpr, 
    by.x = "gene",
    by.y = "gene"
)

dfVar[["Type"]] <- "Standard"
dfVar[dfVar$gene %in% OsC@assays$RNA@var.features, "Type"] <- "Most Variable"

dfVar[["text"]] <- ""
dfVar[dfVar$gene %in% as.vector(Obio@dataTableList$referenceList[[label30]]), "text"] <- Obio@dataTableList$referenceList[label30]

#dotsize <- 0.5

library(ggrepel)

plotList[[tag]] <- ggplot(
    data = dfVar, 
        aes(
            x=Avg.Expression, 
            y=variance.standardized, label = text, color = Type
        )
) + geom_point( shape=16, size = dotsize
)  + xlab("Average Expression") + ylab("Variance Standarized")  +  theme(
    axis.text.y   = element_text(size=8),
    axis.text.x   = element_text(size=8),
    axis.title.y  = element_text(size=8),
    axis.title.x  = element_text(size=8),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA, size=1),
    plot.title = element_text(hjust = 0.5, size = 12)
) + ggtitle(paste0("Variance vs. Expression in the Overall Experiment")
) + scale_color_manual(values=c("#FF0000", "#000000")
) + geom_text_repel()

    
    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0("variation.integrated.samples.", VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
    link <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/scatterplot?x_axis=",paste0(tag, "_AvgExpr"),"&y_axis=",paste0(tag, "_var_std"))
    

    figCap <- paste0(
        "**Figure ",
        figureCount,
        ":** Variance versus averaged gene expression for overall sample.", 
        names(SampleList)[i],
        ". ",
        "Download a pdf of this figure [here](", FNrel, "). ",
        "An interactive version of this figure can be found [here](", link, "). "
    )
    
    
   
    
    NewChnk <- paste0(
        "#### ",tag,
        "\n```{r varplot_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
     
    ## Histogram Part C done                                                 ##
    ###########################################################################
    
       
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    figureCount <- figureCount + 1
    print(" All variation done.")

## Now the individual samples ##

xmax <- ceiling(max(dfVar$Avg.Expression))
ymax <- ceiling(max(dfVar$variance.standardized))

dfVarRes <- unique(
    dfVar[,c("gene", "Avg.Expression", "variance.standardized")]
)

names(dfVarRes) <- gsub(
    "Avg.Expression", paste0(tag, "_AvgExpr"), names(dfVarRes)
)

names(dfVarRes) <- gsub(
    "variance.standardized", paste0(tag, "_var_std"), names(dfVarRes)
)
    
for (i in 1:length(SampleList)){
    tag <- paste0("Ind_var_", names(SampleList)[i])
    
    
    DefaultAssay(SampleList[[i]]) <- "RNA"

    SampleList[[i]] <- FindVariableFeatures(object = SampleList[[i]])

    SampleList[[i]] <- FindVariableFeatures(
        object = SampleList[[i]],
        selection.method = 'vst', 
        nfeatures = 2000
    )

    # Identify the 10 most highly variable genes
    label2000 <- paste0(names(SampleList)[i], "_", "top2000var")
    Obio@dataTableList$referenceList[[ label2000]]<- head(
        x = VariableFeatures(object = SampleList[[i]]), 
        2000
    )
    
    label30 <- paste0(names(SampleList)[i], "_", "top30var")
    Obio@dataTableList$referenceList[[ label30]]<- head(
        x = VariableFeatures(object = SampleList[[i]]), 
        30
    )

    ## slot for variable features OsC@assays$RNA@var.features

    dfVar <- SampleList[[i]]@assays$RNA@meta.features
    names(dfVar) <- gsub("vst.", "",names(dfVar))
    dfVar[["gene"]] <- row.names(dfVar)

    
    cluster.averages <- AverageExpression(
        SampleList[[i]], 
        return.seurat = TRUE
    )
    
    dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
    dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
    names(dfAvgExpr)[1] <- "Avg.Expression"

    dfVar <- merge(
        dfVar, 
        dfAvgExpr, 
        by.x = "gene",
        by.y = "gene"
    )

    dfVar[["Type"]] <- "Standard"
    dfVar[dfVar$gene %in% SampleList[[i]]@assays$RNA@var.features, "Type"] <- "Most Variable"

    dfVar[["text"]] <- ""
    dfVar[dfVar$gene %in% as.vector(Obio@dataTableList$referenceList[[label30]]), "text"] <- Obio@dataTableList$referenceList[label30]

    # dotsize <- 0.5

    library(ggrepel)

    plotList[[tag]] <- ggplot(
        data = dfVar, 
        aes(
            x=Avg.Expression, 
            y=variance.standardized, label = text, color = Type)
            ) + geom_point( shape=16, size = dotsize
            )  + xlab("Average Expression") + ylab("Variance Standarized")  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12)
            )+ ggtitle(paste0("Individual variance in the ", names(SampleList)[i],  " sample.")
            ) + scale_color_manual(values=c("#FF0000", "#000000")
            ) + geom_text_repel(
            ) +  xlim(0, xmax) + ylim(0, ymax)

    
    ###########################################################################
    ## Save plot to file                                                     ##
    FNbase <- paste0("Individual.var.features",names(SampleList)[i], VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    ##                                                                       ##
    ###########################################################################
    
    
    link <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/scatterplot?x_axis=",paste0(tag, "_AvgExpr"),"&y_axis=",paste0(tag, "_var_std"))
    

    figCap <- paste0(
        "**Figure ",
        figureCount,
        ":** Variance versus averaged gene expression for sample ", 
        names(SampleList)[i],
        ". ",
        "Download a pdf of this figure [here](", FNrel, "). ",
        "An interactive version of this figure can be found [here](", link, "). "
    )
    
    NewChnk <- paste0(
        "#### ",tag,
        "\n```{r varplot_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
     
    ## Histogram Part C done                                                 ##
    ###########################################################################
    
       
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    figureCount <- figureCount + 1
    print(paste0(names(SampleList)[i], " individual variation done."))
    
    ## Add to result table ##
    dfVarTemp <- unique(
    dfVar[,c("gene", "Avg.Expression", "variance.standardized")]
    )
    
    names(dfVarTemp) <- gsub(
        "Avg.Expression", paste0(tag, "_AvgExpr"), names(dfVarTemp)
    )
    
    names(dfVarTemp) <- gsub(
        "variance.standardized", paste0(tag, "_var_std"), names(dfVarTemp)
    )
    
    dfVarRes <- merge(
        dfVarRes, 
        dfVarTemp, 
        by.x = "gene",
        by.y = "gene",
        all = TRUE
    )
    dfVarRes[is.na(dfVarRes)] <- 0
    
}

## Make sure summary goes first ##
Obio@dataTableList[["dfVariation"]] <- dfVarRes
## plot list will be integrated in full figure ##
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

Integrated_Samples

**Figure 1:** Variance versus averaged gene expression for overall sample.PD7_1. Download a pdf of this figure [here](report_figures/variation.integrated.samples..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=Integrated_Samples_AvgExpr&y_axis=Integrated_Samples_var_std).

Figure 1: Variance versus averaged gene expression for overall sample.PD7_1. Download a pdf of this figure here. An interactive version of this figure can be found here.

Ind_var_PD7_1

**Figure 2:** Variance versus averaged gene expression for sample PD7_1. Download a pdf of this figure [here](report_figures/Individual.var.featuresPD7_1.V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=Ind_var_PD7_1_AvgExpr&y_axis=Ind_var_PD7_1_var_std).

Figure 2: Variance versus averaged gene expression for sample PD7_1. Download a pdf of this figure here. An interactive version of this figure can be found here.

Ind_var_PD4_2

**Figure 3:** Variance versus averaged gene expression for sample PD4_2. Download a pdf of this figure [here](report_figures/Individual.var.featuresPD4_2.V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=Ind_var_PD4_2_AvgExpr&y_axis=Ind_var_PD4_2_var_std).

Figure 3: Variance versus averaged gene expression for sample PD4_2. Download a pdf of this figure here. An interactive version of this figure can be found here.

Ind_var_E18_5_1

**Figure 4:** Variance versus averaged gene expression for sample E18_5_1. Download a pdf of this figure [here](report_figures/Individual.var.featuresE18_5_1.V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=Ind_var_E18_5_1_AvgExpr&y_axis=Ind_var_E18_5_1_var_std).

Figure 4: Variance versus averaged gene expression for sample E18_5_1. Download a pdf of this figure here. An interactive version of this figure can be found here.

###############################################################################
## Perform integrated analysis                                               ##

if (length(Obio@sampleDetailList) > 1){
    DefaultAssay(OsC) <- "integrated"
} else {
    Obio@parameterList$singleCellClusterString <- gsub("integrated", "RNA", Obio@parameterList$singleCellClusterString)
}


# Run the standard workflow for visualization and clustering
## This will scale on the most variable features only
OsC <- ScaleData(OsC, verbose = FALSE)

OsC <- RunPCA(
    OsC, 
    npcs = Obio@parameterList$singleCellSeuratNpcs4PCA, verbose = FALSE
)
# t-SNE and Clustering

## Add PCA clusters to data collection ##



OsC <- RunUMAP(OsC, reduction = "pca", dims = 1:20)

OsC <- RunTSNE(OsC, reduction = "pca", dims = 1:20)

OsC <- FindNeighbors(OsC, reduction = "pca", dims = 1:20)

OsC <- FindClusters(OsC, resolution = Obio@parameterList$singleCellClusterParameter)

## Rational: Run PCA on variable features, then scale data for heatmaps and other applications

if (Obio@parameterList$scIntegrationMethod != "RNA"){
DefaultAssay(OsC) <- Obio@parameterList$scIntegrationMethod
    allGenes <- rownames(x = OsC@assays[[Obio@parameterList$scIntegrationMethod]])
    OsC <- ScaleData(OsC, verbose = FALSE, features=allGenes)
}

DefaultAssay(OsC) <- "RNA"
allGenes <- rownames(x = OsC@assays$RNA)
OsC <- ScaleData(OsC, verbose = FALSE, features=allGenes)

Dimensionality Reduction Plots by Sample

If you wish to get a bit of background on tSNE dimensionality reduction, take a look at this youtube video by Josh Starmer from the University of North Carolina.

If you wish to get a bit of background on UMAP (and other) dimensionality reduction algorithms, take a look at this youtube video recaping a lecture at the PyData 2018 conference.

reductionVec <- c("umap", "tsne")

plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")

## Merge UMAP Coordinates and cluster assignment ##

for (i in 1:length(reductionVec)){
    tag <- paste0("Dimplot_by_sample_", i)
    
    plotList[[tag]] <- DimPlot(OsC, reduction = reductionVec[i], group.by = "sampleID")
    
    
    ## Save to file ##
    FNbase <- paste0("dimplot.by.sample.", reductionVec[i],".", VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    
    if (reductionVec[i] == "tsne"){
    link <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/pca?x_axis=tSNE_1&y_axis=tSNE_2")
    } else {
    link <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/pca?x_axis=UMAP_1&y_axis=UMAP_2")  
    }
    
    figLegend <- paste0(
        "**Figure ", 
        figureCount, 
        ":** ",
        reductionVec[i],
        " plot depicting all samples. Download a pdf of this figure [here](", FNrel,").",
        "An interactive version of this figure can be found [here](", link, "). "
    )
    
    NewChnk <- paste0(
        "#### ",reductionVec[i],
        "\n```{r Dimplot_by_sample_",
        i,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figLegend,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
}

## Done integraed analysis                                                   ##
###############################################################################
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

umap

**Figure 5:** umap plot depicting all samples. Download a pdf of this figure [here](report_figures/dimplot.by.sample.umap..V20201005.pdf).An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=UMAP_1&y_axis=UMAP_2).

Figure 5: umap plot depicting all samples. Download a pdf of this figure here.An interactive version of this figure can be found here.

tsne

**Figure 5:** tsne plot depicting all samples. Download a pdf of this figure [here](report_figures/dimplot.by.sample.tsne..V20201005.pdf).An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=tSNE_1&y_axis=tSNE_2).

Figure 5: tsne plot depicting all samples. Download a pdf of this figure here.An interactive version of this figure can be found here.

Dimensionality Reduction Plots by Cluster

reductionVec <- c("umap", "tsne")

plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")


###############################################################################
## First UMAP all samples together                                           ##
tag <- paste0("UMAP_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
  dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
            
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
            
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
            
            
## Add cluster colors ##
dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))

maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)            

library(scales)
clusterCols = hue_pal()(length(clusterVec))
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)

# dotsize  = 1
# if (nrow(dfPlot) > 10000){
#   dotsize  = 0.75
# } else if (nrow(dfPlot) > 20000){
#   dotsize = 0.5
# } else if (nrow(dfPlot) > 50000){
#   dotsize = 0.25
# }

plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(UMAP_1, UMAP_2, color=Cluster)
            ) + geom_point( shape=16, size = as.numeric(dotsize)
            ) + xlab("UMAP1") + ylab("UMAP2")  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12),
                legend.title = element_blank()
            ) + ggtitle(paste0("Sample: ", tag)
            ) + xlim(minX, maxX) + ylim(minY, maxY
            ) + coord_fixed(ratio=1
            ) + theme_bw()
            
if (length(unique(dfPlot$Cluster)) > 15){
   plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
            pdf(FN)
                print(plotList[[tag]])
            dev.off()
            
            link <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/pca?x_axis=UMAP_1&y_axis=UMAP_2")  
            
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                ":** ",
                " UMAP showing all cells from all samples together. Download a pdf of this figure [here](", FNrel,").",
                "An interactive version of this figure can be found [here](", link, "). "
            )
            
            figureCount <- figureCount + 1
            
            NewChnk <- paste0(
                "#### ", tag,
                "\n```{r SL_UMAP_",
                tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )


## Done first umap all samples                                               ##
###############################################################################
            
###############################################################################
## First tsne all samples together                                           ##
tag <- paste0("tSNE_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
  dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$tSNE_1 <- NULL
dfPlot$tSNE_2 <- NULL
            
## Get tsNE coordinates ##
coord <- data.frame(OsC@reductions$tsne@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
            
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$tSNE_1 != 0 & dfPlot$tSNE_2 != 0,]
            
            
## Add cluster colors ##
dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))

maxX <- 1.1*max(dfPlot$tSNE_1, na.rm = T)
minX <- 1.1*min(dfPlot$tSNE_1, na.rm = T)
maxY <- 1.1*max(dfPlot$tSNE_2, na.rm = T)
minY <- 1.1*min(dfPlot$tSNE_2, na.rm = T)            

library(scales)
clusterCols = hue_pal()(length(clusterVec))
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)

# dotsize  = 1.5
# if (nrow(dfPlot) > 10000){
#   dotsize  = 0.75
# } else if (nrow(dfPlot) > 50000){
#   dotsize = 0.5
# } else {
#   dotsize = 0.25
# }

plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(tSNE_1, tSNE_2, color=Cluster)
            ) + geom_point( shape=16, size = as.numeric(dotsize)
            ) + xlab("tSNE1") + ylab("tSNE2")  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12),
                legend.title = element_blank()
            ) + ggtitle(paste0("Sample: ", tag)
            ) + xlim(minX, maxX) + ylim(minY, maxY
            ) + coord_fixed(ratio=1
            ) + theme_bw() 
            
if (length(unique(dfPlot$Cluster)) > 15){
   plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
            pdf(FN)
                print(plotList[[tag]])
            dev.off()
            
            link <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/pca?x_axis=tSNE_1&y_axis=tSNE_2")  
            
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                ":** ",
                " tSNE showing all cells from all samples together. Download a pdf of this figure [here](", FNrel,").",
                "An interactive version of this figure can be found [here](", link, "). "
            )
            
            figureCount <- figureCount + 1
            
            NewChnk <- paste0(
                "#### ", tag,
                "\n```{r SL_tSNE_",
                tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )


## Done first tsne all samples                                               ##
###############################################################################            
###############################################################################
## Make one UMAP plot per sample                                             ##

sampleVec <- sort(unique(OsC@meta.data$sampleID))

dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
  dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
            
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
            
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
            
            
## Add cluster colors ##
dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
            
library(scales)
clusterCols = hue_pal()(length(clusterVec))
            
dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)            
            
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)               
                                   
for (i in 1:length(sampleVec)){
    tag <- paste0("UMAP_plot_by_", sampleVec[i])
    
    dfPlotSel <- dfPlot[dfPlot$sampleID == sampleVec[i], ]
            
            
            
    plotList[[tag]] <- ggplot(data=dfPlotSel[dfPlot$included == "+",], aes(UMAP_1, UMAP_2, color=Cluster)
            ) + geom_point( shape=16, size = as.numeric(dotsize)
            ) + xlab("UMAP1") + ylab("UMAP2")  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12),
                legend.title = element_blank()
            ) + ggtitle(paste0("Sample: ", tag)
            ) + xlim(minX, maxX) + ylim(minY, maxY
            ) + coord_fixed(ratio=1
            ) + theme_bw() 
    
    if (length(unique(dfPlot$Cluster)) > 15){
   plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
            
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
            pdf(FN)
                print(plotList[[tag]])
            dev.off()
            
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                ":** ",
                " Sample-level UMAPs. Download a pdf of this figure [here](", FNrel,")."
            )
            
            figureCount <- figureCount + 1
            
            NewChnk <- paste0(
                paste("#### ", tag),
                "\n```{r SL_UMAP_",
                tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )
            
            
    
    
    
    
}

## Done making one umap plot per sample                                      ##
###############################################################################


###############################################################################
## Add cluster dendrogram by sample                                          ##

if (length(unique(OsC@meta.data$sampleID)) > 3){
  library(ggtree)
  Idents(OsC) <- "sampleID"
  OsC <- BuildClusterTree(OsC)
      
  tag <- paste0("Sample_Dendrogram")
      
  OsC@tools$BuildClusterTree$tip.label <- paste0("C", OsC@tools$BuildClusterTree$tip.label)
      
  plotList[[tag]]  <- ggplot(OsC@tools$BuildClusterTree
      ) + geom_tree(
      ) + theme_tree(
      ) + geom_tiplab(
      ) + labs(title=tag
      ) + theme(
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title.x=element_blank(),
        plot.title = element_text(hjust = 0.5, size = 12)
      )  + xlim(0,1.2*max(OsC@tools$BuildClusterTree[[2]]))
 
      
  ## Save to file ##
  FNbase <- paste0(tag,".", VersionPdfExt)
  FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
  FNrel <- paste0("report_figures/", FNbase)
      
  pdf(FN)
      print(plotList[[tag]])
  dev.off()
      
  figLegend <- paste0(
      "**Figure ", 
          figureCount, 
          ":** ",
          " Clusterplot dendrogram by sample ID. ","A pdf of this figure can be downloaded [here](",FNrel,")."
      )
      
      
      NewChnk <- paste0(
          "#### SampleID Dendrogram",
          "\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
          figLegend,"'}\n",
          "\n",
          "\n print(plotList[['",tag,"']])",
          "\n cat(  '\n')",
          "\n\n\n```\n"   
      )
      
      chnkVec <- c(
          chnkVec,
          NewChnk
      )
      
      
      figureCount <- figureCount + 1
}

## Done by sample                                                            ##
###############################################################################
            
###############################################################################
## Add cluster dendrogram by cluster                                         ##
library(ggtree)
Idents(OsC) <- "seurat_clusters"
OsC <- BuildClusterTree(OsC)
    
tag <- paste0("Cluster_Dendrogram")
    
OsC@tools$BuildClusterTree$tip.label <- paste0("C", OsC@tools$BuildClusterTree$tip.label)
    
plotList[[tag]]  <- ggplot(OsC@tools$BuildClusterTree
    ) + geom_tree(
    ) + theme_tree(
    ) + geom_tiplab(
    ) + labs(title=tag
    ) + theme(
      panel.border = element_rect(colour = "black", fill=NA, size=1),
      axis.title.x=element_blank(),
      plot.title = element_text(hjust = 0.5, size = 12)
    )  + xlim(0,1.2*max(OsC@tools$BuildClusterTree[[2]]))
    
    #+ xlim(-1,1.2*max(OsC@tools$BuildClusterTree$edge)) 
    
    
## Save to file ##
FNbase <- paste0(tag,".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
    
pdf(FN)
    print(plotList[[tag]])
dev.off()
    
figLegend <- paste0(
    "**Figure ", 
        figureCount, 
        ":** ",
        " Clusterplot dendrogram. ","A pdf of this figure can be downloaded [here](",FNrel,")."
    )
    
    
    NewChnk <- paste0(
        "#### Cluster Dendrogram",
        "\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figLegend,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    
    figureCount <- figureCount + 1

## Done integraed analysis                                                   ##
###############################################################################


###############################################################################
## Find all markers                                                          ##
DefaultAssay(OsC) <- "RNA"
Idents(OsC) <- Obio@parameterList$singleCellClusterString

lgFCthreshold <- 0.25

dfGeneralMarkers <- FindAllMarkers(
    object = OsC, 
    only.pos = FALSE, 
    min.pct = 0.1, 
    logfc.threshold = lgFCthreshold,
    test.use = "roc",
    assay =  "RNA",
    slot = "data"
)

if (nrow(dfGeneralMarkers) == 0){
    lgFCthreshold <- 0.05
dfGeneralMarkers <- FindAllMarkers(
    object = OsC, 
    only.pos = FALSE, 
    min.pct = 0.1, 
    logfc.threshold = lgFCthreshold,
    test.use = "roc",
    assay =  "RNA",
    slot = "data"
)
}

if (nrow(dfGeneralMarkers) > 0){
    dfGeneralMarkers[["direction"]] <- ""
    dfGeneralMarkers[dfGeneralMarkers$avg_diff >= 0, "direction"] <- "positive"
    dfGeneralMarkers[dfGeneralMarkers$avg_diff < 0, "direction"] <- "negative"
    
    Obio@dataTableList[["dfGeneralMarkers"]] <- dfGeneralMarkers
    
    dfGeneralMarkersFilt <- dfGeneralMarkers[dfGeneralMarkers$avg_diff > lgFCthreshold | dfGeneralMarkers$avg_diff < -lgFCthreshold,]
    Obio@dataTableList[["dfGeneralMarkersFilt"]] <- dfGeneralMarkersFilt
    
    dfTop1 <- data.frame(dfGeneralMarkers %>% group_by(cluster) %>% top_n(1, avg_diff))
    dfTop5 <- data.frame(dfGeneralMarkers %>% group_by(cluster) %>% top_n(5, avg_diff))
    dfTop10 <- data.frame(dfGeneralMarkers %>% group_by(cluster) %>% top_n(10, avg_diff))
    
    Obio@dataTableList[["dfGeneralMarkersTop10"]] <- dfTop10
    
    Obio@dataTableList$referenceList[["Top10clusterMarkers"]] <- as.vector(
        unique(
            dfTop10$gene
        )
    )
} else {
    Obio@dataTableList[["dfGeneralMarkers"]] <- NULL
}

#############################################################
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

UMAP_All_Samples

**Figure 5:**  UMAP showing all cells from all samples together. Download a pdf of this figure [here](report_figures/UMAP_All_Samples.V20201005.pdf).An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=UMAP_1&y_axis=UMAP_2).

Figure 5: UMAP showing all cells from all samples together. Download a pdf of this figure here.An interactive version of this figure can be found here.

tSNE_All_Samples

**Figure 6:**  tSNE showing all cells from all samples together. Download a pdf of this figure [here](report_figures/tSNE_All_Samples.V20201005.pdf).An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=tSNE_1&y_axis=tSNE_2).

Figure 6: tSNE showing all cells from all samples together. Download a pdf of this figure here.An interactive version of this figure can be found here.

UMAP_plot_by_E18_5_1

**Figure 7:**  Sample-level UMAPs. Download a pdf of this figure [here](report_figures/UMAP_plot_by_E18_5_1.V20201005.pdf).

Figure 7: Sample-level UMAPs. Download a pdf of this figure here.

UMAP_plot_by_PD4_2

**Figure 8:**  Sample-level UMAPs. Download a pdf of this figure [here](report_figures/UMAP_plot_by_PD4_2.V20201005.pdf).

Figure 8: Sample-level UMAPs. Download a pdf of this figure here.

UMAP_plot_by_PD7_1

**Figure 9:**  Sample-level UMAPs. Download a pdf of this figure [here](report_figures/UMAP_plot_by_PD7_1.V20201005.pdf).

Figure 9: Sample-level UMAPs. Download a pdf of this figure here.

Cluster Dendrogram

**Figure 10:**  Clusterplot dendrogram. A pdf of this figure can be downloaded [here](report_figures/Cluster_Dendrogram..V20201005.pdf).

Figure 10: Clusterplot dendrogram. A pdf of this figure can be downloaded here.

Check Accuracy UMAP Distances

The sleepwalk tool will provide the euclidean distances between individual cells. This will help you to determine clustering accuracy. Find more information here.
library(sleepwalk)
sleepwalk( 
  OsC@reductions$umap@cell.embeddings, 
  OsC@reductions$pca@cell.embeddings,
  saveToFile=paste(Obio@parameterList$outputDir,"sleepwalk.UMAP.html",sep='')
)
htmltools::includeHTML(paste(Obio@parameterList$outputDir,"sleepwalk.UMAP.html",sep=''))
Sleepwalk

Highlight Potential Doublets

This analyis gives an indication of which cells MIGHT be doublets. Chris McGinnis DoubletFinder package is used to identify potential doublets.

## Add UMAP coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$umap@cell.embeddings)

OsC <- addDf2seuratMetaData(
    obj = OsC, 
    dfAdd = dfAdd
)

## Add tSNE coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$tsne@cell.embeddings)

OsC <- addDf2seuratMetaData(
    obj = OsC, 
    dfAdd = dfAdd
)


plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")
dfTemp <- OsC@meta.data

pos <- grep("Doublet_plot", names(dfTemp))

if (length(pos) > 0){

  ## First make variation plot for integrated samples, than for all individual samples separately
  tag <- "Doublet_plot"
  
  dfTemp$DF_Classification <- factor(dfTemp$DF_Classification, levels = sort(unique(dfTemp$DF_Classification)))
  
  
  #dotsize <- round(7500/nrow(dfTemp),1)
  # dotsize <- 0.3
  
  plotList[[tag]] <- ggplot(dfTemp, aes(UMAP_1, UMAP_2, color=DF_Classification)
          )+ geom_point( 
              shape = 16,
              size = as.numeric(dotsize)
          ) + xlab("UMAP1") + ylab("UMAP2")  +  theme(
              axis.text.y   = element_text(size=8),
              axis.text.x   = element_text(size=8),
              axis.title.y  = element_text(size=8),
              axis.title.x  = element_text(size=8),
              axis.line = element_line(colour = "black"),
              panel.border = element_rect(colour = "black", fill=NA, size=1),
              plot.title = element_text(hjust = 0.5, size = 12),
              panel.background = element_rect(fill = "lightgrey")
          ) + ggtitle("Potential Doublets"
          ) + scale_color_manual(values=c("#000000","#FF0000")
          ) + theme_bw()
  
  #+ xlim(minX, maxX) + ylim(minY, maxY)  
      
      
      
              ## Save to file ##
              FNbase <- paste0("DoubletFinderAll", VersionPdfExt)
              FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
              FNrel <- paste0("report_figures/", FNbase)
              
             
              pdf(FN)
              print(plotList[[tag]])
              dev.off()
              
              
              
              ## Create R markdown chunk ##
              figLegend <- paste0(
                  "**Figure ", 
                  figureCount, 
                  "**: Figure depicting the location of potential doublets in PCA components 1 and 2. Download a pdf of this figure [here](", FNrel, "). "
              )
              
              figureCount <- figureCount + 1 
              
              NewChnk <- paste0(
                  " #### Doublets All Timepoints", 
                  "\n```{r ", tag, ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                  figLegend,"'}\n",
                  "\n",
                  "\n print(plotList[['",tag,"']])",
                  "\n cat(  '\n')",
                  "\n\n\n```\n"   
              )
              
              chnkVec <- c(
                  chnkVec,
                  NewChnk
              )
}
## plot list will be integrated in full figure ##
if (length(pos) > 0){
  cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
}

Cell Cycle Markers

###############################################################################
## Estimate cell cycle genes                                                 ##
exprFN <- paste0(hpc.mount, "Projects/reference_data/cell_cycle_vignette_files/nestorawa_forcellcycle_expressionMatrix.txt")

exp.mat <- read.table(file = exprFN, header = TRUE, 
                      as.is = TRUE, row.names = 1)


# A list of cell cycle markers, from Tirosh et al, 2015, is loaded with Seurat.  We can
# segregate this list into markers of G2/M phase and markers of S phase
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes

print(paste0("Used as S-phase marker genes: ", sort(unique(paste(s.genes, collapse = ", ")))))
print(paste0("Used as G2M-phase marker genes: ", sort(unique(paste(g2m.genes, collapse = ", ")))))

# Create our Seurat object and complete the initalization steps
OsC <- CellCycleScoring(OsC, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)

reductionVec <- c("umap", "tsne")
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")

###############################################################################
## First UMAP all samples together                                           ##
tag <- paste0("CellCyclePhase_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
  dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
            
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
            
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
            
            
## Add cluster colors ##
#dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))

maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)            

# library(scales)
# clusterCols = hue_pal()(length(clusterVec))
# dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)

# dotsize  = 1
# if (nrow(dfPlot) > 10000){
#   dotsize  = 0.75
# } else if (nrow(dfPlot) > 20000){
#   dotsize = 0.5
# } else if (nrow(dfPlot) > 50000){
#   dotsize = 0.25
# }

plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(UMAP_1, UMAP_2, color=Phase)
            ) + geom_point( shape=16, size = as.numeric(dotsize)
            ) + xlab("UMAP1") + ylab("UMAP2")  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12),
                legend.title = element_blank()
            ) + ggtitle(paste0("Sample: ", tag)
            ) + xlim(minX, maxX) + ylim(minY, maxY
            ) + coord_fixed(ratio=1
            ) + theme_bw() 
            
if (length(unique(dfPlot$Cluster)) > 15){
   plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
            pdf(FN)
                print(plotList[[tag]])
            dev.off()
            
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                ":** ",
                " UMAP showing all cells from all samples together with the estimated cell-cycle phase color-coded. Download a pdf of this figure [here](", FNrel,")."
            )
            
            figureCount <- figureCount + 1
            
            NewChnk <- paste0(
                "#### ", tag,
                "\n```{r CC_UMAP_",
                tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )


## Done first umap all samples                                               ##
###############################################################################
            
###############################################################################
## First tsne all samples together                                           ##
tag <- paste0("tSNE_All_Samples")
dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
  dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
dfPlot$tSNE_1 <- NULL
dfPlot$tSNE_2 <- NULL
            
## Get tsNE coordinates ##
coord <- data.frame(OsC@reductions$tsne@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
            
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$tSNE_1 != 0 & dfPlot$tSNE_2 != 0,]
            
            
## Add cluster colors ##
#dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))

maxX <- 1.1*max(dfPlot$tSNE_1, na.rm = T)
minX <- 1.1*min(dfPlot$tSNE_1, na.rm = T)
maxY <- 1.1*max(dfPlot$tSNE_2, na.rm = T)
minY <- 1.1*min(dfPlot$tSNE_2, na.rm = T)            

#library(scales)
#clusterCols = hue_pal()(length(clusterVec))
#dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)

# dotsize  = 1.5
# if (nrow(dfPlot) > 10000){
#   dotsize  = 0.75
# } else if (nrow(dfPlot) > 50000){
#   dotsize = 0.5
# } else {
#   dotsize = 0.25
# }

plotList[[tag]] <- ggplot(data=dfPlot[dfPlot$included == "+",], aes(tSNE_1, tSNE_2, color=Phase)
            ) + geom_point( shape=16, size = as.numeric(dotsize)
            ) + xlab("tSNE1") + ylab("tSNE2")  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12),
                legend.title = element_blank()
            ) + ggtitle(paste0("Sample: ", tag)
            ) + xlim(minX, maxX) + ylim(minY, maxY
            ) + coord_fixed(ratio=1
            ) + theme_bw() 
            
if (length(unique(dfPlot$Cluster)) > 15){
   plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
            pdf(FN)
                print(plotList[[tag]])
            dev.off()
            
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                ":** ",
                " tSNE showing all cells from all samples together. The esimated cell-cycle phase is color coded. Download a pdf of this figure [here](", FNrel,")."
            )
            
            figureCount <- figureCount + 1
            
            NewChnk <- paste0(
                "#### ", tag,
                "\n```{r CC_tSNE_",
                tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )


## Done first tsne all samples                                               ##
###############################################################################            
###############################################################################
## Make one UMAP plot per sample                                             ##

sampleVec <- sort(unique(OsC@meta.data$sampleID))

dfPlot <- OsC@meta.data
pos <- grep("included", names(dfPlot))
if (length(pos) == 0){
  dfPlot[["included"]] <- "+"
}
dfPlot[["cellID"]] <- row.names(dfPlot)
            
## Get UMAP coordinates ##
coord <- data.frame(OsC@reductions$umap@cell.embeddings)
coord[["cellID"]] <- row.names(coord)
coord <-coord[coord$cellID %in% dfPlot$cellID, ]
dfPlot$UMAP_1 <- NULL
dfPlot$UMAP_2 <- NULL
            
dfPlot <- merge(dfPlot, coord, by.x = "cellID", by.y="cellID", all=T)
dfPlot[is.na(dfPlot)] <- 0
dfPlot <- dfPlot[dfPlot$UMAP_1 != 0 & dfPlot$UMAP_2 != 0,]
            
            
## Add cluster colors ##
#dfPlot[["Cluster"]] <- paste0("C", dfPlot$seurat_clusters)
#clusterVec <- as.vector(paste0("C", unique(sort(dfPlot$seurat_clusters))))
            
#library(scales)
#clusterCols = hue_pal()(length(clusterVec))
            
#dfPlot$Cluster <- factor(dfPlot$Cluster, levels = clusterVec)            
            
maxX <- 1.1*max(dfPlot$UMAP_1, na.rm = T)
minX <- 1.1*min(dfPlot$UMAP_1, na.rm = T)
maxY <- 1.1*max(dfPlot$UMAP_2, na.rm = T)
minY <- 1.1*min(dfPlot$UMAP_2, na.rm = T)               
                                   
for (i in 1:length(sampleVec)){
    tag <- paste0("UMAP_CC_plot_by_", sampleVec[i])
    
    dfPlotSel <- dfPlot[dfPlot$sampleID == sampleVec[i], ]
            
            
            
    plotList[[tag]] <- ggplot(data=dfPlotSel[dfPlot$included == "+",], aes(UMAP_1, UMAP_2, color=Phase)
            ) + geom_point( shape=16, size = as.numeric(dotsize)
            ) + xlab("UMAP1") + ylab("UMAP2")  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12),
                legend.title = element_blank()
            ) + ggtitle(paste0("Sample: ", tag)
            ) + xlim(minX, maxX) + ylim(minY, maxY
            ) + coord_fixed(ratio=1
            ) + theme_bw() 
    
    if (length(unique(dfPlot$Cluster)) > 15){
   plotList[[tag]] <- plotList[[tag]] + theme(legend.position = "none")
}
            
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
            pdf(FN)
                print(plotList[[tag]])
            dev.off()
            
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                ":** ",
                " Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure [here](", FNrel,")."
            )
            
            figureCount <- figureCount + 1
            
            NewChnk <- paste0(
                paste("#### ", tag),
                "\n```{r CC_UMAP_",
                tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )
            
            
    
    
    
    
}

## Done making one umap plot per sample                                      ##
###############################################################################


###############################################################################
## Add cluster dendrogram by sample                                          ##

if (length(unique(OsC@meta.data$sampleID)) > 3){
  library(ggtree)
  Idents(OsC) <- "sampleID"
  OsC <- BuildClusterTree(OsC)
      
  tag <- paste0("Sample_Dendrogram")
      
  OsC@tools$BuildClusterTree$tip.label <- paste0("C", OsC@tools$BuildClusterTree$tip.label)
      
  plotList[[tag]]  <- ggplot(OsC@tools$BuildClusterTree
      ) + geom_tree(
      ) + theme_tree(
      ) + geom_tiplab(
      ) + labs(title=tag
      ) + theme(
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        axis.title.x=element_blank(),
        plot.title = element_text(hjust = 0.5, size = 12)
      )  + xlim(0,1.2*max(OsC@tools$BuildClusterTree[[2]]))
 
      
  ## Save to file ##
  FNbase <- paste0(tag,".", VersionPdfExt)
  FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
  FNrel <- paste0("report_figures/", FNbase)
      
  pdf(FN)
      print(plotList[[tag]])
  dev.off()
      
  figLegend <- paste0(
      "**Figure ", 
          figureCount, 
          ":** ",
          " Clusterplot dendrogram by sample ID. ","A pdf of this figure can be downloaded [here](",FNrel,")."
      )
      
      
      NewChnk <- paste0(
          "#### SampleID Dendrogram",
          "\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
          figLegend,"'}\n",
          "\n",
          "\n print(plotList[['",tag,"']])",
          "\n cat(  '\n')",
          "\n\n\n```\n"   
      )
      
      chnkVec <- c(
          chnkVec,
          NewChnk
      )
      
      
      figureCount <- figureCount + 1
}

## Done by sample                                                            ##
###############################################################################
            
###############################################################################
## Add cluster dendrogram by cluster                                         ##
library(ggtree)
Idents(OsC) <- "seurat_clusters"
OsC <- BuildClusterTree(OsC)
    
tag <- paste0("Cluster_Dendrogram")
    
OsC@tools$BuildClusterTree$tip.label <- paste0("C", OsC@tools$BuildClusterTree$tip.label)
    
plotList[[tag]]  <- ggplot(OsC@tools$BuildClusterTree
    ) + geom_tree(
    ) + theme_tree(
    ) + geom_tiplab(
    ) + labs(title=tag
    ) + theme(
      panel.border = element_rect(colour = "black", fill=NA, size=1),
      axis.title.x=element_blank(),
      plot.title = element_text(hjust = 0.5, size = 12)
    )  + xlim(0,1.2*max(OsC@tools$BuildClusterTree[[2]]))
    
    #+ xlim(-1,1.2*max(OsC@tools$BuildClusterTree$edge)) 
    
    
## Save to file ##
FNbase <- paste0(tag,".", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
    
pdf(FN)
    print(plotList[[tag]])
dev.off()
    
figLegend <- paste0(
    "**Figure ", 
        figureCount, 
        ":** ",
        " Clusterplot dendrogram. ","A pdf of this figure can be downloaded [here](",FNrel,")."
    )
    
    
    NewChnk <- paste0(
        "#### Cluster Dendrogram",
        "\n```{r ", tag, "results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figLegend,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    
    figureCount <- figureCount + 1

## Done integraed analysis                                                   ##
###############################################################################
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

CellCyclePhase_All_Samples

**Figure 11:**  UMAP showing all cells from all samples together with the estimated cell-cycle phase color-coded. Download a pdf of this figure [here](report_figures/CellCyclePhase_All_Samples.V20201005.pdf).

Figure 11: UMAP showing all cells from all samples together with the estimated cell-cycle phase color-coded. Download a pdf of this figure here.

tSNE_All_Samples

**Figure 12:**  tSNE showing all cells from all samples together. The esimated cell-cycle phase is color coded. Download a pdf of this figure [here](report_figures/tSNE_All_Samples.V20201005.pdf).

Figure 12: tSNE showing all cells from all samples together. The esimated cell-cycle phase is color coded. Download a pdf of this figure here.

UMAP_CC_plot_by_E18_5_1

**Figure 13:**  Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure [here](report_figures/UMAP_CC_plot_by_E18_5_1.V20201005.pdf).

Figure 13: Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure here.

UMAP_CC_plot_by_PD4_2

**Figure 14:**  Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure [here](report_figures/UMAP_CC_plot_by_PD4_2.V20201005.pdf).

Figure 14: Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure here.

UMAP_CC_plot_by_PD7_1

**Figure 15:**  Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure [here](report_figures/UMAP_CC_plot_by_PD7_1.V20201005.pdf).

Figure 15: Sample-level UMAPs. Estimated cell-cylce phase color-coded. Download a pdf of this figure here.

Cluster Dendrogram

**Figure 16:**  Clusterplot dendrogram. A pdf of this figure can be downloaded [here](report_figures/Cluster_Dendrogram..V20201005.pdf).

Figure 16: Clusterplot dendrogram. A pdf of this figure can be downloaded here.

Barchart N cells and Percent in Clusters

if you could prepare a bar-shape graph with the %of cells clusters representing our populations (like in the Nat Med)

###############################################################################
## Create datatable for plotting                                             ##


## This plotting procedure requires three sets: the sets cellTypeIDs, clusterIDs, cellTypeIDs
## level1ID, level2ID, level3ID

sampleIDs <- unique(OsC@meta.data$sampleID)
#Obio@parameterList$singleCellSeuratMtCutoff <- 20

if (is.null(Obio@parameterList$singleCellSeuratMtCutoff)){
    Obio@parameterList$singleCellSeuratMtCutoff <- rep(10, length(sampleIDs))    
} else if (length(Obio@parameterList$singleCellSeuratMtCutoff) == 1){
    Obio@parameterList$singleCellSeuratMtCutoff <- rep(
        Obio@parameterList$singleCellSeuratMtCutoff, 
        length(sampleIDs)
    )
} else if (length(Obio@parameterList$singleCellSeuratMtCutoff) == length(sampleIDs)){
    Obio@parameterList$singleCellSeuratMtCutoff <- Obio@parameterList$singleCellSeuratMtCutoff
} else {
    print("Can't determine mitochondrial cut off. ")
    stop()
}

sampleIDs <- unique(OsC@meta.data$sampleID)
clusterIDs <- unique(OsC@meta.data[,Obio@parameterList$singleCellClusterString])

if (length(grep("cellIdent", names(OsC@meta.data))) == 0){
  OsC@meta.data[["cellIdent"]] <- "All"
}

cellTypeIDs <- unique(OsC@meta.data[,"cellIdent"])

dfTemp <- OsC@meta.data

if (length(grep("^cellIdent$", names(dfTemp))) == 0){
  dfTemp[["cellIdent"]] <- "All"
}

dfTemp <- dfTemp[dfTemp$percent.mt <= max(Obio@parameterList$singleCellSeuratMtCutoff), ]
dfTemp[["cellID"]] <- row.names(dfTemp)
dfTemp <- unique(dfTemp[,c("cellID", "sampleID", Obio@parameterList$singleCellClusterString, "cellIdent")])
names(dfTemp) <- gsub(Obio@parameterList$singleCellClusterString, "Cluster", names(dfTemp) )
names(dfTemp) <- gsub(Obio@parameterList$singleCellClusterString, "Cluster", names(dfTemp) )
 dfTemp <- unique(dfTemp[,c("cellID", "sampleID", "Cluster", "cellIdent")])
 
dfRes <- dfTemp
dfRes$cellID <- NULL
row.names(dfRes) <- NULL
dfRes <- unique(dfRes)
dfRes[["N_cells"]] <- 0


for (i in 1:nrow(dfRes)){
  dfRes[i, "N_cells"] <- nrow(dfTemp[dfTemp$sampleID == dfRes[i,"sampleID"] & dfTemp$Cluster == dfRes[i,"Cluster"] & dfTemp$cellIdent == dfRes[i,"cellIdent"], ])
}




## Calculate cluster percentages per celltypeID ##
dfRes[["Perc_cells"]] <- 0
for (i in 1:length(cellTypeIDs)){
  dfResTemp2 <- dfRes[dfRes$cellIdent == cellTypeIDs[i], ]
  tempCluster <- as.vector(unique(dfResTemp2$Cluster))
  
  for (j in 1:length(tempCluster)){
    dfResTemp3 <- dfResTemp2[dfResTemp2$Cluster == tempCluster[j],]
    NclusterTotal <- sum(dfResTemp3[, "N_cells"])
    dfResTemp3[,"Perc_cells"] <- round(dfResTemp3[,"N_cells"]/NclusterTotal, 4)*100
    
    
    if (j ==1){
      dfRes3 <- dfResTemp3
    } else {
      dfRes3 <- rbind(dfResTemp3, dfRes3)
    }
  }
  
  if (i ==1){
    dfRes4 <- dfRes3
  } else {
    dfRes4 <- rbind(dfRes3, dfRes4)
  }
  
}
 
dfRes <- dfRes4

plotListNcells <- list()
plotListPercent <- list()
chnkVec <- as.vector(NULL, mode = "character")

for (i in 1:length(cellTypeIDs)){
  #############################################################################
  ## Create cell number plot                                                 ##
  tag <- cellTypeIDs[i]
  dfResTemp <- dfRes[dfRes$cellIdent == cellTypeIDs[i], ]
  
  ## Calculate percentages for this subset ##
  
  plotListNcells[[tag]] <- ggplot(
    ) + geom_bar(aes(y = N_cells, x = Cluster, fill = sampleID), data = dfResTemp, stat="identity"
    ) + labs(title=tag, x="Cluster", y = "Cell Count"
    ) +  theme(
      panel.border = element_rect(colour = "black", fill=NA, size=1),
      axis.title.x=element_blank(),
      plot.title = element_text(hjust = 0.5, size = 12)
    ) + coord_flip()
  
  ###########################################################################
  ## Save plot to file                                                     ##
  FNbase <- paste0(tag,".Ncells", VersionPdfExt)
  FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
  FNrel <- paste0("report_figures/", FNbase)
    
  pdf(FN)
      print(plotListNcells[[tag]])
  dev.off()
  ##                                                                       ##
  ###########################################################################
    
  ###########################################################################
  ## Add to chunk                                                          ##
  figCap <- paste0(
      "**Figure ",
      figureCount,
      "A:** Cell Count in each cluster for ", 
      tag,
      "Download a pdf of this figure [here](", FNrel, "). "
  )
  
  NewChnk <- paste0(
    paste0("#### Barchart_ ", tag),
        "\n```{r Barchart-",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
        "\n",
        "\n print(plotListNcells[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
  )
    
  chnkVec <- c(
      chnkVec,
      NewChnk
  )
  ## Done adding                                                             ##
  #############################################################################
  
  #############################################################################
  ## Add percentage plot                                                     ##
  plotListPercent[[tag]] <- ggplot(
    ) + geom_bar(aes(x = Cluster, y = Perc_cells, fill = sampleID), data = dfResTemp, stat="identity"
    ) + labs(title=tag, x="Cluster", y = "Percent Cells"
    ) +  theme(
      panel.border = element_rect(colour = "black", fill=NA, size=1),
      axis.title.x=element_blank(),
      plot.title = element_text(hjust = 0.5, size = 12)
    ) +  coord_flip()
  
  ###########################################################################
  ## Save plot to file                                                     ##
  FNbase <- paste0(tag, ".percent.cells",VersionPdfExt)
  FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
  FNrel <- paste0("report_figures/", FNbase)
    
  pdf(FN)
      print(plotListPercent[[tag]])
  dev.off()
  ##                                                                       ##
  ###########################################################################
    
  ###########################################################################
  ## Add to chunk                                                          ##
  figCap <- paste0(
      "**Figure ",
      figureCount,
      "B:** Cell percentages in each cluster for ", 
      tag,
      "Download a pdf of this figure [here](", FNrel, "). "
  )
  
  NewChnk <- paste0(
        "\n```{r Barchart-percent_",tag,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",figCap,"'}\n",
        "\n",
        "\n print(plotListPercent[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
  )
    
  chnkVec <- c(
      chnkVec,
      NewChnk
  )
  ## Done adding percentage plot                                             ##
  #############################################################################
  
  
  figureCount <- figureCount + 1
}

## Done creating data table                                                  ##
###############################################################################
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

Barchart_ All

**Figure 17A:** Cell Count in each cluster for AllDownload a pdf of this figure [here](report_figures/All.Ncells.V20201005.pdf).

Figure 17A: Cell Count in each cluster for AllDownload a pdf of this figure here.

**Figure 17B:** Cell percentages in each cluster for AllDownload a pdf of this figure [here](report_figures/All.percent.cells.V20201005.pdf).

Figure 17B: Cell percentages in each cluster for AllDownload a pdf of this figure here.

###############################################################################
## Identify conserved markers                                                ##

#To identify canonical cell type marker genes that are conserved across conditions, we provide the FindConservedMarkers function. This function performs differential gene expression testing for each dataset/group and combines the p-values using meta-analysis methods from the MetaDE R package. For example, we can calculated the genes that are conserved markers irrespective of stimulation condition in cluster 6 (NK cells).

# DefaultAssay(immune.combined) <- "RNA"
# markers <- FindConservedMarkers(OsC, ident.1 = 6, grouping.var = "orig.ident", verbose = FALSE)
# head(markers)
# 
# markers <- FindConservedMarkers(OsC,  grouping.var = "orig.ident", verbose = FALSE)
# head(markers)

## Done identify conserved markers                                           ##
###############################################################################

##################
## Bespoke                                                                   ##
DefaultAssay(OsC) <- "RNA"
Idents(OsC) <- Obio@parameterList$singleCellClusterString

if (!is.null(Obio@parameterList$DGEbyCluster)){
    for (i in 1:length(Obio@parameterList$DGEbyCluster)){
        Obio@enrichmentList[[names(Obio@parameterList$DGEbyCluster)[i]]] <- FindMarkers(
            object = OsC, 
            ident.1 = as.vector(unlist(Obio@parameterList$DGEbyCluster[[i]][1])),
            ident.2 = as.vector(unlist(Obio@parameterList$DGEbyCluster[[i]][2])),
            min.pct = 0.05,
            logfc.threshold = 0
        )
    }
}
        



## Done bespoke                                                              ##
###############################################################################

## Done finding all markers                                                  ##
###############################################################################

Category Enrichment Scatterplots

# save(Obio, 
#      file = paste0(
#          Obio@parameterList$localWorkDir,
#          Obio@parameterList$project_id,
#          ".temp.bioLOGIC.Robj"
#      )
# )

#print("Obio Object saved.")

save(OsC,
    file = paste0(
         Obio@parameterList$localWorkDir,
         Obio@parameterList$project_id,
        ".Seurat.Robj"
     )
)

library(AUCell)
plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")

# Defined in the section above #

## This needs to become a gmt file ##
if (is.null(Obio@parameterList$catRefFile)){
    FNcat <- paste0(hpc.mount, "Projects/schaefera/tobias.ackels/360_scRNAseq_mm_10X_1M_neurons_20k/basedata/asl320.referenceCats.txt")
} else {
    FNcat <- Obio@parameterList$catRefFile
}

if (length(grep(".gmt$", FNcat)) > 0){
    print("Load gmt file. To be implemented.")
    stop()
} else {
    dfHeatmapGenes <- read.delim(
      FNcat,
      header = T,
      sep = "\t",
      stringsAsFactors = F
      
    )
    
    if (is.null(Obio@parameterList[["cat2DotplotList"]])){
        Obio@parameterList[["cat2DotplotList"]] <- list()
    }
    
    if (is.null(Obio@parameterList[["cat2HMplotList"]])){
        Obio@parameterList[["cat2HMplotList"]] <- list()
    }
    
    
    

for (i in 1:ncol(dfHeatmapGenes)){
    genes <- as.vector(dfHeatmapGenes[2:nrow(dfHeatmapGenes),i])
    genes <- genes[genes %in% rownames(x = OsC@assays$RNA)]
    if (length(unique(genes)) < 61 |  (length(unique(genes)) > 0)){
        Obio@parameterList[["cat2DotplotList"]][[names(dfHeatmapGenes)[i]]] <- genes
    }
    
    if ((length(unique(genes)) < 501) |  (length(unique(genes)) > 2) ){
        Obio@parameterList[["cat2HMplotList"]] [[names(dfHeatmapGenes)[i]]] <- genes
    }
}
}


## Add transcription factors to dotplot ##
if (Obio@parameterList$geneIDcolumn != "mgi_symbol" & Obio@parameterList$geneIDcolumn != "hgnc_symbol") {
    queryGS <- "hgnc_symbol" 
} else {
    queryGS <- Obio@parameterList$geneIDcolumn
}


tempVec <- retrieve.gene.category.from.db(
    cat_id = "ag_lab_categories__10",
    password = db.pwd,
    gene.symbol = queryGS,
    user = Obio@parameterList$db.user,
    host = Obio@parameterList$host
)

###############################################################################
## If this is fish, translation is non-human or non-mouse, translation is necessary
if (queryGS != Obio@parameterList$geneIDcolumn){
    dfAnno <- Obio@dfGeneAnnotation
    dfAnno <- unique(dfAnno[,c("hgnc_symbol",Obio@parameterList$geneIDcolumn )])
    dfAnno <- dfAnno[dfAnno$hgnc_symbol != "", ]
    dfAnno <- dfAnno[dfAnno$hgnc_symbol %in% tempVec, ]
    tempVec <- unique(dfAnno[,Obio@parameterList$geneIDcolumn])
    tempVec <- tempVec[tempVec != ""]
}

dfHMG <- dfGeneralMarkers[dfGeneralMarkers$gene %in% tempVec, ]
dfHMGsel <- data.frame(dfHMG %>% group_by(cluster) %>% top_n(5, avg_diff))

Obio@parameterList[["cat2DotplotList"]][["Top5_TF_per_cluster_Markers"]] <- as.vector(unique(dfHMGsel$gene))

## Add cluster defining transcription factors to the collection ##
## For the dotplot ##


###############################################################################
## Get backdrop

exprMatrix <- as.matrix(OsC@assays$RNA@counts)
#logMat <- log10(exprMatrix+1)

# When using a Seurat object #
logMat <- data.frame(OsC[["RNA"]]@data)

## Load tSNE coordinates ##
cellsTsne <- data.frame(OsC@reductions$umap@cell.embeddings)

## done
FNbase <- paste0("CatScatter_Rankings", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
    

pdf(FN)
    cells_rankings <- AUCell_buildRankings(exprMatrix)
dev.off()

geneSets <- Obio@parameterList$cat2DotplotList

cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings, aucMaxRank=nrow(cells_rankings)*0.05)

## Select thresholds ##


FNbase <- paste0("CatScatterHist", VersionPdfExt)
FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
FNrel <- paste0("report_figures/", FNbase)
            
pdf(FN)
    set.seed(123)
    cells_assignment <- AUCell_exploreThresholds(
        cells_AUC, 
        plotHist=TRUE, 
        nCores=1, 
        assign=TRUE
    )
dev.off()


## Add data to dfExpr ##

## Plot CatScatters ##
for (i in 1:length(Obio@parameterList$cat2DotplotList)){
    HMname <- names(Obio@parameterList$cat2DotplotList)[i]
    tag <- gsub("[.]", "_", HMname)
    
    FNbase <- paste0("CatScatterHist_", HMname, VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    
    selectedThresholds <-  cells_assignment[[i]]$aucThr$thresholds 
    
    if ("minimumDens" %in% rownames(selectedThresholds)) {
        pThr <- selectedThresholds["minimumDens", "threshold"]
    } else if ("Global_k1" %in% rownames(selectedThresholds)){
        pThr <- selectedThresholds["Global_k1", "threshold"]
    } else {
        pThr <- selectedThresholds[1, "threshold"]
    }
    
    if (nrow(cellsTsne) > 15000){
        cex = 0.25
    } else if (nrow(cellsTsne) > 1000){
        cex = 0.5 
    } else {
        cex = 1
    }
    
    
    ## Get AUC matrix ##
    tSNE.df <- data.frame(cellsTsne, cell=rownames(cellsTsne))
    mAUC <- getAUC(cells_AUC)[HMname,rownames(tSNE.df)]
    dfAUC <- data.frame(mAUC)
    dfAUC[["cellID"]] <- row.names(dfAUC)
    dfAUC <- merge(dfAUC, tSNE.df, by.x = "cellID", by.y = "cell")
    
    dfDocAUC <- unique(dfAUC[,c("cellID", "mAUC")])
    dfDocAUC[["cat"]] <- paste0("Cat_",tag) 
    if (i == 1){
      dfResAUC <- dfDocAUC
    } else {
      dfResAUC <- rbind(
        dfResAUC, 
        dfDocAUC
      )
    }
    
    input <- list(
        "x_axis" = "UMAP1",
        "y_axis" = "UMAP2",
        "gene" = HMname
    )
    #dotsize <- cex
    
    legendNote <- paste0(
            " The following genes of this dataset are represented in this figure: ",
            paste0(sort(Obio@parameterList$cat2DotplotList[[i]]), collapse = ", ")
        )
    
     plotList[[tag]] <- ggplot(data = dfAUC, aes(x=UMAP_1, y=UMAP_2, color = mAUC)
            )+ geom_point( shape=16, size = dotsize
            ) + scale_color_gradient("AUC", low="grey", high="darkblue"
            ) + xlab(input$x_axis) + ylab(input$y_axis)  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12)
            )+ ggtitle(paste0("Category: ", input$gene)
            ) + coord_fixed(ratio = 1
            ) + theme_bw() 
     
     
     #+ theme(legend.position="none") 
     
    FNbase <- paste0("CatScatter", HMname, VersionPdfExt)
    FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    ## Create R markdown chunk ##
    figLegend <- paste0(
        "**Figure ", 
        figureCount, 
        "A:** Category Scatter showing gene category ", 
        HMname, ". ", legendNote, 
        ". Download a pdf of this figure [here](", FNrel,"). "
    )
            
            
   
            
    NewChnk <- paste0(
        "#### Category Feature Plot ",HMname,
                "\n```{r CatFeatPlot1_",
                i,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
          
        
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    ###########################################################################
    ## Add part B - dotplot                                                  ##
    DefaultAssay(OsC) <- "RNA"

    OsC@meta.data[["hmIdent2"]] <- paste0("C", OsC@meta.data[,Obio@parameterList$singleCellClusterString])
    
    levels <- paste0(
      "C",
      sort(unique(OsC@meta.data[,Obio@parameterList$singleCellClusterString]))
      )
    
    OsC@meta.data$hmIdent2 <- factor(OsC@meta.data$hmIdent2, levels=levels)
    
    Idents(OsC) <- "hmIdent2"
    
    
    
    
   
        HMname <- paste0("Dotplot_", names(Obio@parameterList$cat2DotplotList)[i])
        tag <- gsub("[.]", "_", HMname)
        
        dpGenes <- unique(Obio@parameterList$cat2DotplotList[[i]])
        legendNote <- paste0("The following genes were found in this category and the single-cell dataset: ", paste0(dpGenes, collapse=", "))
        
        plotList[[tag]] <- DotPlotSB(
            object = OsC,
            features = dpGenes,
            #cols = cols,
            group.by = NULL,
            split.by = NULL,
            dot.scale = 4,
            col.min = 0,
            col.max = 5,
            #assay = "RNA"
        ) + ggtitle(gsub("_", "", tag)) + coord_fixed() + coord_flip()
        
        FNbase <- paste0(HMname, VersionPdfExt)
        FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
        FNrel <- paste0("report_figures/", FNbase)
        
        pdf(FN)
            print(plotList[[tag]])
        dev.off()
        ## Create R markdown chunk ##
        figLegend <- paste0(
            "**Figure ", 
            figureCount, 
            "B:** Dotplot showing gene category ", 
            HMname, ". ", legendNote, 
            ". Download a pdf of this figure [here](", FNrel,"). "
        )
                
                
        figureCount <- figureCount + 1 
                
        NewChnk <- paste0(
                    "\n```{r ",tag,
                    ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                    figLegend,"'}\n",
                    "\n",
                    "\n print(plotList[['",tag,"']])",
                    "\n cat(  '\n')",
                    "\n\n\n```\n"   
                )
              
            
        
        chnkVec <- c(
            chnkVec,
            NewChnk
        )
    
    
    ## Done adding dotplot                                                   ##
    ###########################################################################
    
    
}

Obio@dataTableList[["dfResAUC"]] <- dfResAUC
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

Category Feature Plot Pre_meiotic_Niu_et_at_2020

**Figure 18A:** Category Scatter showing gene category Pre_meiotic_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: 1500009L16Rik, 1810022K09Rik, 1810037I17Rik, Akap12, Aldoa, Ank3, Anp32a, Anp32b, Ap3b2, Apoe, Arid5b, Arpc1b, Ash2l, Atp5g1, Atrx, Bax, BC048679, Bcl7c, Brd8, Bst2, C1qbp, Ccna2, Ccnb1, Cdca3, Cdca8, Cdk4, Cenpa, Cenpf, Cep164, Cep89, Chchd10, Chd4, Cox7b, Cox8a, Crip1, Crip2, Csrp2, Dctpp1, Dek, Diaph1, Dnaja1, Dnajc8, Dnd1, Dnmt1, Dppa4, Dppa5a, Dut, Eif3a, Eif4ebp1, Eno1, Epcam, Etfb, Fam60a, Ftsj3, G3bp1, Gadd45gip1, Gapdh, Gar1, Gjb3, Glrx2, Gm10076, Gnai2, Gnl3, Gpx1, Gpx4, Gspt1, H2afv, Hba-x, Hbb-y, Hdgf, Hist1h2ap, Hmgb3, Hmgn1, Hmgn5, Hnrnpa1, Hnrnpa3, Hnrnpab, Hnrnpd, Hnrnpu, Hsp90ab1, Hspe1, Ifitm1, Ifitm2, Ifitm3, Incenp, Kdm1a, Klf2, L1td1, Lgals1, Lima1, Lyar, Mak16, Map1lc3b, Mcm3, Mcm7, Mif, Mki67, Mkrn1, Mrpl23, Mrps12, Msh2, Mt2, Mtf2, Mybbp1a, Mybl2, Myo10, Naca, Nap1l1, Ncl, Nhp2, Nme1, Nolc1, Npm1, Nudc, Parp1, Pdgfa, Peg3, Pgam1, Phb, Phb2, Phlda2, Pkm, Pmm1, Pou5f1, Ppp1r14b, Prc1, Prdx2, Prmt1, Psma7, Psmb3, Psmb5, Psme1, Psme2, Rad21, Ranbp1, Rangrf, Rcc2, Rest, Rhox5, Rhox6, Rhox9, Rnps1, Rpl18, Rpl24, Rpl30, Rpl6, Rpl8, Rplp0, Rps11, Rps13, Rps18, Rps3, Rrp9, Sall4, Selenoh, Serbp1, Set, Sf3b2, Sfpq, Sgo2a, Siva1, Slc25a4, Slc25a5, Smc1a, Smc2, Smc3, Sms, Snrpa1, Socs2, Sox2, Sptbn2, Srm, Ssbp4, Ssrp1, Tbrg1, Tcof1, Tdh, Tet1, Thrap3, Tmem256, Tomm40, Tomm5, Top2a, Tpm3, Trp53, Tuba1b, Tubb5, Txn1, Txnrd1, Ube2c, Ubtf, Ubxn1, Ung, Utf1, Vars, Wdr43, Ybx1, Zfp106, Zfp428. Download a pdf of this figure [here](report_figures/CatScatterPre_meiotic_Niu_et_at_2020.V20201005.pdf).

Figure 18A: Category Scatter showing gene category Pre_meiotic_Niu_et_at_2020. The following genes of this dataset are represented in this figure: 1500009L16Rik, 1810022K09Rik, 1810037I17Rik, Akap12, Aldoa, Ank3, Anp32a, Anp32b, Ap3b2, Apoe, Arid5b, Arpc1b, Ash2l, Atp5g1, Atrx, Bax, BC048679, Bcl7c, Brd8, Bst2, C1qbp, Ccna2, Ccnb1, Cdca3, Cdca8, Cdk4, Cenpa, Cenpf, Cep164, Cep89, Chchd10, Chd4, Cox7b, Cox8a, Crip1, Crip2, Csrp2, Dctpp1, Dek, Diaph1, Dnaja1, Dnajc8, Dnd1, Dnmt1, Dppa4, Dppa5a, Dut, Eif3a, Eif4ebp1, Eno1, Epcam, Etfb, Fam60a, Ftsj3, G3bp1, Gadd45gip1, Gapdh, Gar1, Gjb3, Glrx2, Gm10076, Gnai2, Gnl3, Gpx1, Gpx4, Gspt1, H2afv, Hba-x, Hbb-y, Hdgf, Hist1h2ap, Hmgb3, Hmgn1, Hmgn5, Hnrnpa1, Hnrnpa3, Hnrnpab, Hnrnpd, Hnrnpu, Hsp90ab1, Hspe1, Ifitm1, Ifitm2, Ifitm3, Incenp, Kdm1a, Klf2, L1td1, Lgals1, Lima1, Lyar, Mak16, Map1lc3b, Mcm3, Mcm7, Mif, Mki67, Mkrn1, Mrpl23, Mrps12, Msh2, Mt2, Mtf2, Mybbp1a, Mybl2, Myo10, Naca, Nap1l1, Ncl, Nhp2, Nme1, Nolc1, Npm1, Nudc, Parp1, Pdgfa, Peg3, Pgam1, Phb, Phb2, Phlda2, Pkm, Pmm1, Pou5f1, Ppp1r14b, Prc1, Prdx2, Prmt1, Psma7, Psmb3, Psmb5, Psme1, Psme2, Rad21, Ranbp1, Rangrf, Rcc2, Rest, Rhox5, Rhox6, Rhox9, Rnps1, Rpl18, Rpl24, Rpl30, Rpl6, Rpl8, Rplp0, Rps11, Rps13, Rps18, Rps3, Rrp9, Sall4, Selenoh, Serbp1, Set, Sf3b2, Sfpq, Sgo2a, Siva1, Slc25a4, Slc25a5, Smc1a, Smc2, Smc3, Sms, Snrpa1, Socs2, Sox2, Sptbn2, Srm, Ssbp4, Ssrp1, Tbrg1, Tcof1, Tdh, Tet1, Thrap3, Tmem256, Tomm40, Tomm5, Top2a, Tpm3, Trp53, Tuba1b, Tubb5, Txn1, Txnrd1, Ube2c, Ubtf, Ubxn1, Ung, Utf1, Vars, Wdr43, Ybx1, Zfp106, Zfp428. Download a pdf of this figure here.

**Figure 18B:** Dotplot showing gene category Dotplot_Pre_meiotic_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Hist1h2ap, Hbb-y, Dppa5a, Hmgn5, Utf1, Ifitm3, Set, Cox8a, Slc25a5, Txn1, Rhox9, Slc25a4, Pou5f1, L1td1, Chchd10, Mt2, Rhox6, Gm10076, Selenoh, Ifitm1, Parp1, Rhox5, Atrx, Rest, Cox7b, Lgals1, Phlda2, Arpc1b, Akap12, Ybx1, 1810037I17Rik, Mif, Ifitm2, Tbrg1, Anp32a, Apoe, Cenpf, Cep164, Cenpa, C1qbp, Nolc1, Nhp2, Hba-x, Gnl3, Lyar, H2afv, Dek, Pmm1, Ncl, Tet1, Atp5g1, Gapdh, Cdk4, Hnrnpa1, Ung, BC048679, Dnajc8, Anp32b, Hmgn1, Tdh, Ppp1r14b, Glrx2, Rad21, Tubb5, Arid5b, Prc1, Gpx1, Sall4, Mybbp1a, Dnd1, Nudc, Srm, Hnrnpd, Socs2, Gpx4, Sox2, Serbp1, Snrpa1, Wdr43, Sf3b2, Prdx2, Mkrn1, Eif4ebp1, G3bp1, Trp53, Txnrd1, Rcc2, Epcam, Tuba1b, Bst2, Rps13, Cep89, Myo10, Prmt1, 1500009L16Rik, Rplp0, Pkm, Aldoa, Bax, Npm1, Hnrnpa3, Pgam1, Pdgfa, Tcof1, Map1lc3b, Mak16, Smc1a, Eno1, Gadd45gip1, Eif3a, Tpm3, Nme1, Tmem256, Gspt1, Dppa4, Peg3, 1810022K09Rik, Tomm5, Bcl7c, Lima1, Gjb3, Psme2, Hsp90ab1, Siva1, Kdm1a, Hnrnpu, Psmb3, Dctpp1, Rpl24, Cdca3, Psme1, Smc2, Crip1, Zfp106, Sfpq, Smc3, Hnrnpab, Tomm40, Ccnb1, Naca, Dut, Ftsj3, Csrp2, Rps18, Mcm3, Incenp, Ubtf, Psmb5, Thrap3, Msh2, Psma7, Vars, Ccna2, Rpl6, Nap1l1, Chd4, Mcm7, Ap3b2, Mtf2, Phb, Mybl2, Ranbp1, Gnai2, Ube2c, Ssbp4, Rps11, Top2a, Dnaja1, Rpl18, Zfp428, Diaph1, Phb2, Hspe1, Ash2l, Crip2, Klf2, Brd8, Rrp9, Etfb, Sgo2a, Sms, Dnmt1, Mki67, Rangrf, Sptbn2, Rpl30, Rpl8, Rps3, Mrpl23, Rnps1, Ank3, Hdgf, Gar1, Hmgb3, Ssrp1, Mrps12, Cdca8, Fam60a, Ubxn1. Download a pdf of this figure [here](report_figures/Dotplot_Pre_meiotic_Niu_et_at_2020.V20201005.pdf).

Figure 18B: Dotplot showing gene category Dotplot_Pre_meiotic_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Hist1h2ap, Hbb-y, Dppa5a, Hmgn5, Utf1, Ifitm3, Set, Cox8a, Slc25a5, Txn1, Rhox9, Slc25a4, Pou5f1, L1td1, Chchd10, Mt2, Rhox6, Gm10076, Selenoh, Ifitm1, Parp1, Rhox5, Atrx, Rest, Cox7b, Lgals1, Phlda2, Arpc1b, Akap12, Ybx1, 1810037I17Rik, Mif, Ifitm2, Tbrg1, Anp32a, Apoe, Cenpf, Cep164, Cenpa, C1qbp, Nolc1, Nhp2, Hba-x, Gnl3, Lyar, H2afv, Dek, Pmm1, Ncl, Tet1, Atp5g1, Gapdh, Cdk4, Hnrnpa1, Ung, BC048679, Dnajc8, Anp32b, Hmgn1, Tdh, Ppp1r14b, Glrx2, Rad21, Tubb5, Arid5b, Prc1, Gpx1, Sall4, Mybbp1a, Dnd1, Nudc, Srm, Hnrnpd, Socs2, Gpx4, Sox2, Serbp1, Snrpa1, Wdr43, Sf3b2, Prdx2, Mkrn1, Eif4ebp1, G3bp1, Trp53, Txnrd1, Rcc2, Epcam, Tuba1b, Bst2, Rps13, Cep89, Myo10, Prmt1, 1500009L16Rik, Rplp0, Pkm, Aldoa, Bax, Npm1, Hnrnpa3, Pgam1, Pdgfa, Tcof1, Map1lc3b, Mak16, Smc1a, Eno1, Gadd45gip1, Eif3a, Tpm3, Nme1, Tmem256, Gspt1, Dppa4, Peg3, 1810022K09Rik, Tomm5, Bcl7c, Lima1, Gjb3, Psme2, Hsp90ab1, Siva1, Kdm1a, Hnrnpu, Psmb3, Dctpp1, Rpl24, Cdca3, Psme1, Smc2, Crip1, Zfp106, Sfpq, Smc3, Hnrnpab, Tomm40, Ccnb1, Naca, Dut, Ftsj3, Csrp2, Rps18, Mcm3, Incenp, Ubtf, Psmb5, Thrap3, Msh2, Psma7, Vars, Ccna2, Rpl6, Nap1l1, Chd4, Mcm7, Ap3b2, Mtf2, Phb, Mybl2, Ranbp1, Gnai2, Ube2c, Ssbp4, Rps11, Top2a, Dnaja1, Rpl18, Zfp428, Diaph1, Phb2, Hspe1, Ash2l, Crip2, Klf2, Brd8, Rrp9, Etfb, Sgo2a, Sms, Dnmt1, Mki67, Rangrf, Sptbn2, Rpl30, Rpl8, Rps3, Mrpl23, Rnps1, Ank3, Hdgf, Gar1, Hmgb3, Ssrp1, Mrps12, Cdca8, Fam60a, Ubxn1. Download a pdf of this figure here.

Category Feature Plot Pre_Leptotene_Niu_et_at_2020

**Figure 19A:** Category Scatter showing gene category Pre_Leptotene_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: Actb, Aldh7a1, Aldoa, Alyref, Anp32e, Arpc1b, Asna1, Atox1, Atp5b, Atp5c1, Atp5g1, Atp5g2, Atp5g3, Atp5j2, Atpif1, Bex3, Bst2, Ccnb1, Ccnb2, Ccnd1, Cct4, Cct5, Cct7, Cdc20, Cdkn1a, Cenpa, Chchd10, Cks1b, Cox6b2, Cox7a1, Cox7b, Cpne5, Crip1, Cyc1, Cycs, Cyct, Dbi, Dctpp1, Ddx39, Ddx39b, Dnd1, Dppa3, Dppa5a, Dusp9, Dut, Eci1, Eef1g, Emb, Exosc8, Fdps, Fkbp4, Fmc1, Fth1, Galk1, Gapdh, Glrx2, Gpx1, Gpx4, Grn, H2afv, H2afy, H2afz, Hint2, Hmgb2, Hmgb3, Hmgn1, Hmgn2, Hnrnpab, Hspe1, Iah1, Idh3g, Ifitm1, Ifitm2, Ifitm3, Isyna1, Jund, Lgals1, Lsm4, Lsm7, Mcm3, Mdh2, Mdk, Mfge8, Mif, Mrpl23, Mrpl42, Mrps21, Msx1, Msx2, Myl12a, Ndufa13, Ndufab1, Ndufb2, Ndufb4, Ndufb8, Ndufs6, Nt5dc2, Nudc, Nudt21, Nxt1, Park7, Pdpn, Pfn1, Phlda2, Plp2, Pmm1, Pou5f1, Ppia, Ppp1r14b, Prdx2, Psat1, Psma3, Psma4, Psma7, Psmb5, Psme1, Psme2, Ptov1, Ran, Rbpms, Rcc2, Rec8, Rexo2, Rhox5, Rhox6, Rhox9, Rpa2, Rpa3, Rpl10, Rpl11, Rpl13, Rpl13a, Rpl18, Rpl24, Rpl27, Rpl3, Rpl34, Rpl36, Rpl36a, Rpl36al, Rpl37, Rpl39, Rpl41, Rpl6, Rpl8, Rpl9, Rps11, Rps15, Rps17, Rps18, Rps2, Rps26, Rps27l, Rps3, Rps3a1, Rps4x, Rps6, Rps8, Sall4, Sarnp, Sct, Sdhaf4, Selenoh, Serbp1, Serf2, Sh3bgrl3, Siva1, Slc25a5, Snrpb, Snrpd1, Snrpg, Sox2, Spag7, Stmn1, Stra8, Sumo2, Taldo1, Tcea3, Tceal9, Tecr, Tex19.1, Tmem160, Tmem256, Trim28, Tspo, Tuba1a, Tuba1b, Tuba3a, Tubb5, Txn1, U2af1, Ube2l3, Uqcrb, Uqcrc1, Usmg5, Utf1, Wdr18, Wdr6, Zfp462. Download a pdf of this figure [here](report_figures/CatScatterPre_Leptotene_Niu_et_at_2020.V20201005.pdf).

Figure 19A: Category Scatter showing gene category Pre_Leptotene_Niu_et_at_2020. The following genes of this dataset are represented in this figure: Actb, Aldh7a1, Aldoa, Alyref, Anp32e, Arpc1b, Asna1, Atox1, Atp5b, Atp5c1, Atp5g1, Atp5g2, Atp5g3, Atp5j2, Atpif1, Bex3, Bst2, Ccnb1, Ccnb2, Ccnd1, Cct4, Cct5, Cct7, Cdc20, Cdkn1a, Cenpa, Chchd10, Cks1b, Cox6b2, Cox7a1, Cox7b, Cpne5, Crip1, Cyc1, Cycs, Cyct, Dbi, Dctpp1, Ddx39, Ddx39b, Dnd1, Dppa3, Dppa5a, Dusp9, Dut, Eci1, Eef1g, Emb, Exosc8, Fdps, Fkbp4, Fmc1, Fth1, Galk1, Gapdh, Glrx2, Gpx1, Gpx4, Grn, H2afv, H2afy, H2afz, Hint2, Hmgb2, Hmgb3, Hmgn1, Hmgn2, Hnrnpab, Hspe1, Iah1, Idh3g, Ifitm1, Ifitm2, Ifitm3, Isyna1, Jund, Lgals1, Lsm4, Lsm7, Mcm3, Mdh2, Mdk, Mfge8, Mif, Mrpl23, Mrpl42, Mrps21, Msx1, Msx2, Myl12a, Ndufa13, Ndufab1, Ndufb2, Ndufb4, Ndufb8, Ndufs6, Nt5dc2, Nudc, Nudt21, Nxt1, Park7, Pdpn, Pfn1, Phlda2, Plp2, Pmm1, Pou5f1, Ppia, Ppp1r14b, Prdx2, Psat1, Psma3, Psma4, Psma7, Psmb5, Psme1, Psme2, Ptov1, Ran, Rbpms, Rcc2, Rec8, Rexo2, Rhox5, Rhox6, Rhox9, Rpa2, Rpa3, Rpl10, Rpl11, Rpl13, Rpl13a, Rpl18, Rpl24, Rpl27, Rpl3, Rpl34, Rpl36, Rpl36a, Rpl36al, Rpl37, Rpl39, Rpl41, Rpl6, Rpl8, Rpl9, Rps11, Rps15, Rps17, Rps18, Rps2, Rps26, Rps27l, Rps3, Rps3a1, Rps4x, Rps6, Rps8, Sall4, Sarnp, Sct, Sdhaf4, Selenoh, Serbp1, Serf2, Sh3bgrl3, Siva1, Slc25a5, Snrpb, Snrpd1, Snrpg, Sox2, Spag7, Stmn1, Stra8, Sumo2, Taldo1, Tcea3, Tceal9, Tecr, Tex19.1, Tmem160, Tmem256, Trim28, Tspo, Tuba1a, Tuba1b, Tuba3a, Tubb5, Txn1, U2af1, Ube2l3, Uqcrb, Uqcrc1, Usmg5, Utf1, Wdr18, Wdr6, Zfp462. Download a pdf of this figure here.

**Figure 19B:** Dotplot showing gene category Dotplot_Pre_Leptotene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Stra8, Phlda2, Tubb5, Tuba1b, Ifitm1, Mif, Rhox5, Rhox9, Fdps, Ifitm2, Sct, Rps2, Atp5g1, Gpx4, Dut, Lgals1, Dusp9, Hspe1, Dppa5a, Rec8, Rps8, Rhox6, Psmb5, Selenoh, Atpif1, Atp5b, Ndufb2, Gapdh, Cox7a1, Pou5f1, Rps3, Ptov1, Atp5c1, Tmem256, Rps4x, Rps3a1, Pdpn, Rpa2, Chchd10, Plp2, Hmgn1, Psme2, Mdk, Mrpl23, Ran, Prdx2, Ccnd1, H2afv, Psme1, Tuba3a, Ifitm3, Glrx2, Snrpg, Nt5dc2, Snrpb, Eef1g, Rpl24, Cenpa, Dbi, Rpl27, Rpl41, Psat1, Rpl18, Ndufa13, H2afz, Cct5, Emb, Sh3bgrl3, Stmn1, Crip1, Hmgb2, Ppia, Rps15, Tceal9, Dnd1, Hmgn2, Grn, Txn1, Psma3, Idh3g, Cyct, Mrpl42, Ppp1r14b, Rps17, Aldoa, Rpa3, Slc25a5, Fth1, Tecr, Dctpp1, Sumo2, Tuba1a, Rps6, Cct4, Rpl37, Dppa3, Tex19.1, Rpl3, Spag7, Ccnb2, Cox7b, Nudc, Cpne5, Hmgb3, Lsm7, Uqcrc1, Serbp1, Rpl36, Cyc1, Aldh7a1, Atp5g3, Myl12a, Zfp462, Rpl10, Mfge8, Pfn1, H2afy, Ndufs6, Rpl36a, Atp5g2, Utf1, Park7, Cox6b2, Alyref, Pmm1, Ndufb8, Rpl36al, Bex3, Anp32e, Gpx1, Msx1, Cycs, Usmg5, Mdh2, Rps27l, Serf2, Cdkn1a, Rpl8, Tcea3, Psma4, Cct7, Rcc2, Asna1, Rpl13a, Sox2, Ddx39b, Uqcrb, Wdr6, Sall4, Fkbp4, Trim28, Rpl13, Rexo2, Bst2, Ddx39, Cdc20, Galk1, Isyna1, Rps26, Taldo1, Rps11, Sarnp, Rpl9, Sdhaf4, U2af1, Tmem160, Jund, Siva1, Msx2, Eci1, Cks1b, Ccnb1, Rbpms, Mcm3, Psma7, Rpl39, Hnrnpab, Ndufb4, Nudt21, Rps18, Rpl11, Nxt1, Exosc8, Actb, Wdr18, Snrpd1, Arpc1b, Ndufab1, Iah1, Hint2, Rpl34, Fmc1, Rpl6, Atp5j2, Mrps21, Tspo, Ube2l3, Lsm4, Atox1. Download a pdf of this figure [here](report_figures/Dotplot_Pre_Leptotene_Niu_et_at_2020.V20201005.pdf).

Figure 19B: Dotplot showing gene category Dotplot_Pre_Leptotene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Stra8, Phlda2, Tubb5, Tuba1b, Ifitm1, Mif, Rhox5, Rhox9, Fdps, Ifitm2, Sct, Rps2, Atp5g1, Gpx4, Dut, Lgals1, Dusp9, Hspe1, Dppa5a, Rec8, Rps8, Rhox6, Psmb5, Selenoh, Atpif1, Atp5b, Ndufb2, Gapdh, Cox7a1, Pou5f1, Rps3, Ptov1, Atp5c1, Tmem256, Rps4x, Rps3a1, Pdpn, Rpa2, Chchd10, Plp2, Hmgn1, Psme2, Mdk, Mrpl23, Ran, Prdx2, Ccnd1, H2afv, Psme1, Tuba3a, Ifitm3, Glrx2, Snrpg, Nt5dc2, Snrpb, Eef1g, Rpl24, Cenpa, Dbi, Rpl27, Rpl41, Psat1, Rpl18, Ndufa13, H2afz, Cct5, Emb, Sh3bgrl3, Stmn1, Crip1, Hmgb2, Ppia, Rps15, Tceal9, Dnd1, Hmgn2, Grn, Txn1, Psma3, Idh3g, Cyct, Mrpl42, Ppp1r14b, Rps17, Aldoa, Rpa3, Slc25a5, Fth1, Tecr, Dctpp1, Sumo2, Tuba1a, Rps6, Cct4, Rpl37, Dppa3, Tex19.1, Rpl3, Spag7, Ccnb2, Cox7b, Nudc, Cpne5, Hmgb3, Lsm7, Uqcrc1, Serbp1, Rpl36, Cyc1, Aldh7a1, Atp5g3, Myl12a, Zfp462, Rpl10, Mfge8, Pfn1, H2afy, Ndufs6, Rpl36a, Atp5g2, Utf1, Park7, Cox6b2, Alyref, Pmm1, Ndufb8, Rpl36al, Bex3, Anp32e, Gpx1, Msx1, Cycs, Usmg5, Mdh2, Rps27l, Serf2, Cdkn1a, Rpl8, Tcea3, Psma4, Cct7, Rcc2, Asna1, Rpl13a, Sox2, Ddx39b, Uqcrb, Wdr6, Sall4, Fkbp4, Trim28, Rpl13, Rexo2, Bst2, Ddx39, Cdc20, Galk1, Isyna1, Rps26, Taldo1, Rps11, Sarnp, Rpl9, Sdhaf4, U2af1, Tmem160, Jund, Siva1, Msx2, Eci1, Cks1b, Ccnb1, Rbpms, Mcm3, Psma7, Rpl39, Hnrnpab, Ndufb4, Nudt21, Rps18, Rpl11, Nxt1, Exosc8, Actb, Wdr18, Snrpd1, Arpc1b, Ndufab1, Iah1, Hint2, Rpl34, Fmc1, Rpl6, Atp5j2, Mrps21, Tspo, Ube2l3, Lsm4, Atox1. Download a pdf of this figure here.

Category Feature Plot Leptone_Niu_et_at_2020

**Figure 20A:** Category Scatter showing gene category Leptone_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: 1110004F10Rik, Abraxas1, Actb, Aldoa, Alkbh5, Alyref, Anp32e, Arf4, Arpc4, Atp5b, Atpif1, Bex2, Bst2, Btg1, C1qbp, Cbx3, Ccnb1, Ccnd1, Ccnh, Cct4, Cct7, Cd63, Cdc20, Cdc26, Cdc42, Cdkn1c, Cdkn2a, Ckb, Cnpy2, Cox20, Cox6b1, Cox8a, Cryzl1, Dbi, Dnaja1, Dnajc8, Dppa4, Dstn, Dusp9, Dut, Echs1, Eef1g, Eif1ad, Eif3i, Eif4a1, Emc6, Emd, Esco2, Etfb, Ezh2, Fam32a, Fam60a, Fignl1, Fis1, Fth1, Galk1, Gapdh, Gata2, Gmps, Gnb1, Gng13, Gpi1, Gpx4, Grpel1, Gsdme, Hbb-bt, Hbb-y, Hccs, Hells, Hif1a, Hint2, Hist1h1b, Hmgb1, Hmgb2, Hmgn1, Hmmr, Hnrnpa1, Hnrnpf, Hnrnpk, Hpf1, Hprt, Idi1, Ifitm1, Ifitm2, Ilf2, Ino80e, Insig1, Iqsec1, Jun, Kdelr2, Lars, Ldha, Lgals1, Lgals7, Lig1, Lrrc8a, Marcksl1, Mcmbp, Mdk, Med21, Mif, Mphosph8, Mrpl23, Mrpl54, Mrps12, Mrps16, Mrps36, Myef2, Ndufa13, Ndufb2, Nenf, Nmb, Nudt21, Nup85, Nusap1, Pa2g4, Pdap1, Pdcd10, Pdcd4, Pdpn, Pex13, Pfn1, Pgam1, Phlda2, Pim1, Plekhf2, Polr2f, Pou5f1, Ppdpf, Prdx2, Ptges3, Ptma, Ptov1, Rbm15, Rhox5, Rhox6, Rhox9, Rnf2, Rpa3, Rpgrip1, Rpl24, Rpl27, Rpl39, Rpl6, Rps27rt, Rps3, Rrm2, Rsl1d1, Rsrc2, Runx1t1, Sarnp, Sdc4, Sdha, Selenoh, Selenok, Selenow, Slc24a5, Slc25a5, Slc39a1, Smc3, Snrpb, Snrpd1, Snrpg, Snrpn, Sparc, Spc25, Srrm2, Ssb, Stra8, Sumo2, Supt16, Swi5, Taldo1, Tcea1, Tex30, Thyn1, Tipin, Tkt, Tmem243, Tmem256, Tmem5, Tmem59, Tmsb4x, Tpi1, Trappc3, Tsen34, Tuba1a, Tuba1b, Tuba3a, Tubb5, Txnrd1, Ube2b, Ufd1, Uqcrb, Utf1, Vim, Ywhaz, Zmat2, Zwint. Download a pdf of this figure [here](report_figures/CatScatterLeptone_Niu_et_at_2020.V20201005.pdf).

Figure 20A: Category Scatter showing gene category Leptone_Niu_et_at_2020. The following genes of this dataset are represented in this figure: 1110004F10Rik, Abraxas1, Actb, Aldoa, Alkbh5, Alyref, Anp32e, Arf4, Arpc4, Atp5b, Atpif1, Bex2, Bst2, Btg1, C1qbp, Cbx3, Ccnb1, Ccnd1, Ccnh, Cct4, Cct7, Cd63, Cdc20, Cdc26, Cdc42, Cdkn1c, Cdkn2a, Ckb, Cnpy2, Cox20, Cox6b1, Cox8a, Cryzl1, Dbi, Dnaja1, Dnajc8, Dppa4, Dstn, Dusp9, Dut, Echs1, Eef1g, Eif1ad, Eif3i, Eif4a1, Emc6, Emd, Esco2, Etfb, Ezh2, Fam32a, Fam60a, Fignl1, Fis1, Fth1, Galk1, Gapdh, Gata2, Gmps, Gnb1, Gng13, Gpi1, Gpx4, Grpel1, Gsdme, Hbb-bt, Hbb-y, Hccs, Hells, Hif1a, Hint2, Hist1h1b, Hmgb1, Hmgb2, Hmgn1, Hmmr, Hnrnpa1, Hnrnpf, Hnrnpk, Hpf1, Hprt, Idi1, Ifitm1, Ifitm2, Ilf2, Ino80e, Insig1, Iqsec1, Jun, Kdelr2, Lars, Ldha, Lgals1, Lgals7, Lig1, Lrrc8a, Marcksl1, Mcmbp, Mdk, Med21, Mif, Mphosph8, Mrpl23, Mrpl54, Mrps12, Mrps16, Mrps36, Myef2, Ndufa13, Ndufb2, Nenf, Nmb, Nudt21, Nup85, Nusap1, Pa2g4, Pdap1, Pdcd10, Pdcd4, Pdpn, Pex13, Pfn1, Pgam1, Phlda2, Pim1, Plekhf2, Polr2f, Pou5f1, Ppdpf, Prdx2, Ptges3, Ptma, Ptov1, Rbm15, Rhox5, Rhox6, Rhox9, Rnf2, Rpa3, Rpgrip1, Rpl24, Rpl27, Rpl39, Rpl6, Rps27rt, Rps3, Rrm2, Rsl1d1, Rsrc2, Runx1t1, Sarnp, Sdc4, Sdha, Selenoh, Selenok, Selenow, Slc24a5, Slc25a5, Slc39a1, Smc3, Snrpb, Snrpd1, Snrpg, Snrpn, Sparc, Spc25, Srrm2, Ssb, Stra8, Sumo2, Supt16, Swi5, Taldo1, Tcea1, Tex30, Thyn1, Tipin, Tkt, Tmem243, Tmem256, Tmem5, Tmem59, Tmsb4x, Tpi1, Trappc3, Tsen34, Tuba1a, Tuba1b, Tuba3a, Tubb5, Txnrd1, Ube2b, Ufd1, Uqcrb, Utf1, Vim, Ywhaz, Zmat2, Zwint. Download a pdf of this figure here.

**Figure 20B:** Dotplot showing gene category Dotplot_Leptone_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Rpgrip1, Actb, Tmsb4x, Slc24a5, Pim1, Vim, Phlda2, Stra8, Tubb5, Cbx3, Pfn1, Lgals1, Rhox5, Slc39a1, Gng13, Hbb-y, Sparc, Lgals7, Hmgb1, Rbm15, Emd, Insig1, Tuba1b, Ptges3, Cdkn1c, Rhox9, Cdc20, Ifitm2, Tmem256, Arf4, Tex30, Runx1t1, Tcea1, Rhox6, Eef1g, Lig1, Rps27rt, Trappc3, Alkbh5, Hprt, Jun, Tuba1a, Etfb, Mif, Zmat2, Pdcd10, Hist1h1b, Hmgb2, Sumo2, Nusap1, Hnrnpk, Mdk, Rpl24, Marcksl1, Ino80e, Ptov1, Fam60a, Utf1, Atpif1, Arpc4, Lars, Dbi, Ube2b, Mcmbp, 1110004F10Rik, Ptma, Med21, Ifitm1, Tuba3a, Btg1, Fis1, Pgam1, Cct4, Smc3, Rpl6, Snrpd1, Hnrnpf, Ppdpf, Mphosph8, Iqsec1, Pou5f1, Srrm2, Anp32e, Myef2, Mrps12, Pa2g4, Rpl27, Cdkn2a, Mrpl54, Esco2, Aldoa, Selenow, Uqcrb, Rsl1d1, Cct7, Fignl1, Nudt21, Dusp9, Cd63, Ssb, Gata2, Dut, Rrm2, Bex2, Ccnb1, Ndufb2, Tmem5, Rpa3, Tsen34, Ccnd1, Ywhaz, Pex13, Thyn1, Fth1, Cdc42, Hmgn1, Selenok, Ndufa13, Dnaja1, Hells, Mrps16, Pdap1, Tkt, Eif4a1, Emc6, Gpx4, Gnb1, Cox6b1, Bst2, Nup85, Txnrd1, Dnajc8, Zwint, Hbb-bt, Pdcd4, Atp5b, Ezh2, Ccnh, Spc25, Eif1ad, Snrpg, Plekhf2, Tpi1, Gsdme, Rsrc2, Selenoh, Cdc26, Supt16, Ufd1, Dstn, Hpf1, Gapdh, Galk1, Ilf2, Hmmr, Mrps36, Hnrnpa1, Snrpn, Polr2f, Rpl39, Kdelr2, Cox20, Abraxas1, Snrpb, Rnf2, Sdc4, Gpi1, Hccs, Pdpn, Grpel1, Cryzl1, Idi1, Taldo1, C1qbp, Slc25a5, Tmem59, Mrpl23, Rps3, Lrrc8a, Cox8a, Hint2, Eif3i, Sdha, Prdx2, Fam32a, Tmem243, Nmb, Cnpy2, Sarnp, Dppa4, Gmps, Swi5, Alyref, Nenf, Ldha, Ckb, Hif1a, Echs1, Tipin. Download a pdf of this figure [here](report_figures/Dotplot_Leptone_Niu_et_at_2020.V20201005.pdf).

Figure 20B: Dotplot showing gene category Dotplot_Leptone_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Rpgrip1, Actb, Tmsb4x, Slc24a5, Pim1, Vim, Phlda2, Stra8, Tubb5, Cbx3, Pfn1, Lgals1, Rhox5, Slc39a1, Gng13, Hbb-y, Sparc, Lgals7, Hmgb1, Rbm15, Emd, Insig1, Tuba1b, Ptges3, Cdkn1c, Rhox9, Cdc20, Ifitm2, Tmem256, Arf4, Tex30, Runx1t1, Tcea1, Rhox6, Eef1g, Lig1, Rps27rt, Trappc3, Alkbh5, Hprt, Jun, Tuba1a, Etfb, Mif, Zmat2, Pdcd10, Hist1h1b, Hmgb2, Sumo2, Nusap1, Hnrnpk, Mdk, Rpl24, Marcksl1, Ino80e, Ptov1, Fam60a, Utf1, Atpif1, Arpc4, Lars, Dbi, Ube2b, Mcmbp, 1110004F10Rik, Ptma, Med21, Ifitm1, Tuba3a, Btg1, Fis1, Pgam1, Cct4, Smc3, Rpl6, Snrpd1, Hnrnpf, Ppdpf, Mphosph8, Iqsec1, Pou5f1, Srrm2, Anp32e, Myef2, Mrps12, Pa2g4, Rpl27, Cdkn2a, Mrpl54, Esco2, Aldoa, Selenow, Uqcrb, Rsl1d1, Cct7, Fignl1, Nudt21, Dusp9, Cd63, Ssb, Gata2, Dut, Rrm2, Bex2, Ccnb1, Ndufb2, Tmem5, Rpa3, Tsen34, Ccnd1, Ywhaz, Pex13, Thyn1, Fth1, Cdc42, Hmgn1, Selenok, Ndufa13, Dnaja1, Hells, Mrps16, Pdap1, Tkt, Eif4a1, Emc6, Gpx4, Gnb1, Cox6b1, Bst2, Nup85, Txnrd1, Dnajc8, Zwint, Hbb-bt, Pdcd4, Atp5b, Ezh2, Ccnh, Spc25, Eif1ad, Snrpg, Plekhf2, Tpi1, Gsdme, Rsrc2, Selenoh, Cdc26, Supt16, Ufd1, Dstn, Hpf1, Gapdh, Galk1, Ilf2, Hmmr, Mrps36, Hnrnpa1, Snrpn, Polr2f, Rpl39, Kdelr2, Cox20, Abraxas1, Snrpb, Rnf2, Sdc4, Gpi1, Hccs, Pdpn, Grpel1, Cryzl1, Idi1, Taldo1, C1qbp, Slc25a5, Tmem59, Mrpl23, Rps3, Lrrc8a, Cox8a, Hint2, Eif3i, Sdha, Prdx2, Fam32a, Tmem243, Nmb, Cnpy2, Sarnp, Dppa4, Gmps, Swi5, Alyref, Nenf, Ldha, Ckb, Hif1a, Echs1, Tipin. Download a pdf of this figure here.

Category Feature Plot Zygotene_Niu_et_at_2020

**Figure 21A:** Category Scatter showing gene category Zygotene_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: 1700013H16Rik, 2310061I04Rik, Abhd18, Acer3, Adarb1, Anapc13, Arpc1a, Asf1b, Atox1, Atp6v1g1, Atpif1, Baz1a, Btbd10, Bud31, Ccdc25, Ccne2, Cdip1, Cdkn2a, Cdkn2d, Cenpw, Chchd6, Ciao1, Cinp, Cited1, Commd1, Cox5a, Cs, Csnk2b, Ctnnbip1, Cystm1, Dazl, Dbi, Dcps, Ddb2, Ddx10, Dmc1, Dmrtc2, Dnajb11, Eaf2, Ebpl, Ecsit, Eif1ad, Eif2s2, Eif3i, Emc6, Emc7, Emc8, Emd, Endog, Esco2, Esf1, Fis1, Fmr1nb, Gpat2, H2-Ab1, H2-D1, H2-K1, Haus8, Hccs, Hdgfl2, Higd1a, Hint1, Hmgb1, Hmox2, Hnrnpm, Hprt, Hsf2bp, Hsp90b1, Hspb11, Id3, Inca1, Iqcb1, Iqsec1, Jade3, Kcnq1ot1, Kdm5a, Kif23, Krcc1, Lamtor1, Lamtor2, Lars, Lig1, Lrrc42, Lsm8, Luzp1, Lypd4, M1ap, Macrod2, Madd, Matr3, Mau2, Med21, Minos1, Mlf1, Mpc1, Mphosph8, Mrpl17, Mrpl35, Mtch2, Ndufa2, Ndufc2, Ndufs5, Nmb, Nmt2, Nrbp1, Oaz2, Ost4, Pbdc1, Pbx3, Pdap1, Pde6d, Pgm2l1, Pigf, Pigp, Pip5k1a, Prdm9, Prpf19, Psmb4, Psmd1, Ptpra, Pura, Rab6a, Rae1, Rbmx2, Rec8, Rex1bd, Rhox7b, Rnaseh2c, Rnf138, Rnf4, Rpp21, Rspry1, Rtn3, Sarnp, Sdhb, Sdhc, Sec11c, Sec22b, Selenok, Serf1, Serf2, Sf3b6, Sgpl1, Sh3bgrl3, Siva1, Slbp, Slc25a17, Slc25a31, Slc38a9, Snu13, Sod2, Spdya, Srp14, Srsf9, Stk38, Stra8, Stx12, Sumo2, Swi5, Swt1, Syce1, Syce2, Synj2bp, Syt14, Taf11, Tbca, Tex101, Tex12, Tex30, Thyn1, Timm10b, Timm17b, Tiparp, Tmed3, Tmem160, Tmem41b, Tmem5, Tmem59, Top2b, Tpgs2, Trpc1, Tsen34, Ttc3, Tuba3a, Txnl1, Uba6, Ube2a, Ube2n, Ube2t, Uqcr10, Uqcr11, Uqcrh, Vamp3, Vdac3, Vps29, Xlr4a, Xlr4b, Xlr4c, Zfp398. Download a pdf of this figure [here](report_figures/CatScatterZygotene_Niu_et_at_2020.V20201005.pdf).

Figure 21A: Category Scatter showing gene category Zygotene_Niu_et_at_2020. The following genes of this dataset are represented in this figure: 1700013H16Rik, 2310061I04Rik, Abhd18, Acer3, Adarb1, Anapc13, Arpc1a, Asf1b, Atox1, Atp6v1g1, Atpif1, Baz1a, Btbd10, Bud31, Ccdc25, Ccne2, Cdip1, Cdkn2a, Cdkn2d, Cenpw, Chchd6, Ciao1, Cinp, Cited1, Commd1, Cox5a, Cs, Csnk2b, Ctnnbip1, Cystm1, Dazl, Dbi, Dcps, Ddb2, Ddx10, Dmc1, Dmrtc2, Dnajb11, Eaf2, Ebpl, Ecsit, Eif1ad, Eif2s2, Eif3i, Emc6, Emc7, Emc8, Emd, Endog, Esco2, Esf1, Fis1, Fmr1nb, Gpat2, H2-Ab1, H2-D1, H2-K1, Haus8, Hccs, Hdgfl2, Higd1a, Hint1, Hmgb1, Hmox2, Hnrnpm, Hprt, Hsf2bp, Hsp90b1, Hspb11, Id3, Inca1, Iqcb1, Iqsec1, Jade3, Kcnq1ot1, Kdm5a, Kif23, Krcc1, Lamtor1, Lamtor2, Lars, Lig1, Lrrc42, Lsm8, Luzp1, Lypd4, M1ap, Macrod2, Madd, Matr3, Mau2, Med21, Minos1, Mlf1, Mpc1, Mphosph8, Mrpl17, Mrpl35, Mtch2, Ndufa2, Ndufc2, Ndufs5, Nmb, Nmt2, Nrbp1, Oaz2, Ost4, Pbdc1, Pbx3, Pdap1, Pde6d, Pgm2l1, Pigf, Pigp, Pip5k1a, Prdm9, Prpf19, Psmb4, Psmd1, Ptpra, Pura, Rab6a, Rae1, Rbmx2, Rec8, Rex1bd, Rhox7b, Rnaseh2c, Rnf138, Rnf4, Rpp21, Rspry1, Rtn3, Sarnp, Sdhb, Sdhc, Sec11c, Sec22b, Selenok, Serf1, Serf2, Sf3b6, Sgpl1, Sh3bgrl3, Siva1, Slbp, Slc25a17, Slc25a31, Slc38a9, Snu13, Sod2, Spdya, Srp14, Srsf9, Stk38, Stra8, Stx12, Sumo2, Swi5, Swt1, Syce1, Syce2, Synj2bp, Syt14, Taf11, Tbca, Tex101, Tex12, Tex30, Thyn1, Timm10b, Timm17b, Tiparp, Tmed3, Tmem160, Tmem41b, Tmem5, Tmem59, Top2b, Tpgs2, Trpc1, Tsen34, Ttc3, Tuba3a, Txnl1, Uba6, Ube2a, Ube2n, Ube2t, Uqcr10, Uqcr11, Uqcrh, Vamp3, Vdac3, Vps29, Xlr4a, Xlr4b, Xlr4c, Zfp398. Download a pdf of this figure here.

**Figure 21B:** Dotplot showing gene category Dotplot_Zygotene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Stra8, Tex12, M1ap, Tuba3a, Mphosph8, Selenok, Hprt, Med21, Tex101, Dbi, Haus8, Cited1, Tpgs2, Pde6d, Dcps, Tex30, Lrrc42, Xlr4b, Pigp, Arpc1a, Xlr4c, Ube2t, Xlr4a, H2-K1, Snu13, H2-D1, 1700013H16Rik, Tmem59, Slc25a31, Rex1bd, Asf1b, Dmrtc2, Vps29, Uqcr10, Swi5, Sec22b, Dnajb11, Cdkn2a, Synj2bp, Esco2, Id3, Cenpw, Spdya, Swt1, Acer3, Mrpl17, Rab6a, Atp6v1g1, Emd, Rspry1, Rtn3, Rbmx2, Lamtor2, Srsf9, Fis1, Timm17b, Zfp398, Cinp, Nmt2, Psmb4, Kdm5a, Hccs, Timm10b, Hmox2, Atox1, Endog, Sarnp, Rpp21, Slbp, Hsf2bp, Syce1, Ebpl, Cox5a, Ecsit, Sod2, Pbx3, Rae1, Cs, Ptpra, Kif23, Hint1, Rnf138, Cystm1, Tmed3, Tbca, Sumo2, Thyn1, Mrpl35, Rnaseh2c, Prdm9, Eif1ad, Iqsec1, Uba6, Kcnq1ot1, Mau2, Csnk2b, Dmc1, Syce2, Chchd6, Minos1, Serf2, Lamtor1, Iqcb1, Ccne2, Cdip1, Rec8, Ttc3, Eif2s2, Emc7, Emc6, Ccdc25, Adarb1, Mtch2, Taf11, Sdhb, Uqcrh, Srp14, Vamp3, Nrbp1, Commd1, Slc38a9, Sgpl1, Cdkn2d, Eaf2, Hnrnpm, Krcc1, Prpf19, Tmem41b, Uqcr11, Oaz2, Macrod2, Jade3, Pgm2l1, Luzp1, Bud31, H2-Ab1, Ndufc2, Rnf4, Matr3, Ndufs5, Tmem160, Lsm8, Mpc1, Nmb, Sf3b6, Sh3bgrl3, Hsp90b1, Fmr1nb, Esf1, Mlf1, Ost4, Lig1, Tsen34, Dazl, Stx12, Baz1a, Slc25a17, Ube2n, Atpif1, Hmgb1, Rhox7b, Ube2a, Ddb2, Ddx10, Ndufa2, Inca1, Sdhc, 2310061I04Rik, Serf1, Psmd1, Sec11c, Lypd4, Higd1a, Siva1, Syt14, Tmem5, Pip5k1a, Anapc13, Vdac3, Ctnnbip1, Madd, Trpc1, Pbdc1, Gpat2, Stk38, Pigf, Pura, Abhd18, Lars, Hdgfl2, Btbd10, Txnl1, Tiparp, Hspb11, Ciao1, Pdap1, Top2b, Eif3i, Emc8. Download a pdf of this figure [here](report_figures/Dotplot_Zygotene_Niu_et_at_2020.V20201005.pdf).

Figure 21B: Dotplot showing gene category Dotplot_Zygotene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Stra8, Tex12, M1ap, Tuba3a, Mphosph8, Selenok, Hprt, Med21, Tex101, Dbi, Haus8, Cited1, Tpgs2, Pde6d, Dcps, Tex30, Lrrc42, Xlr4b, Pigp, Arpc1a, Xlr4c, Ube2t, Xlr4a, H2-K1, Snu13, H2-D1, 1700013H16Rik, Tmem59, Slc25a31, Rex1bd, Asf1b, Dmrtc2, Vps29, Uqcr10, Swi5, Sec22b, Dnajb11, Cdkn2a, Synj2bp, Esco2, Id3, Cenpw, Spdya, Swt1, Acer3, Mrpl17, Rab6a, Atp6v1g1, Emd, Rspry1, Rtn3, Rbmx2, Lamtor2, Srsf9, Fis1, Timm17b, Zfp398, Cinp, Nmt2, Psmb4, Kdm5a, Hccs, Timm10b, Hmox2, Atox1, Endog, Sarnp, Rpp21, Slbp, Hsf2bp, Syce1, Ebpl, Cox5a, Ecsit, Sod2, Pbx3, Rae1, Cs, Ptpra, Kif23, Hint1, Rnf138, Cystm1, Tmed3, Tbca, Sumo2, Thyn1, Mrpl35, Rnaseh2c, Prdm9, Eif1ad, Iqsec1, Uba6, Kcnq1ot1, Mau2, Csnk2b, Dmc1, Syce2, Chchd6, Minos1, Serf2, Lamtor1, Iqcb1, Ccne2, Cdip1, Rec8, Ttc3, Eif2s2, Emc7, Emc6, Ccdc25, Adarb1, Mtch2, Taf11, Sdhb, Uqcrh, Srp14, Vamp3, Nrbp1, Commd1, Slc38a9, Sgpl1, Cdkn2d, Eaf2, Hnrnpm, Krcc1, Prpf19, Tmem41b, Uqcr11, Oaz2, Macrod2, Jade3, Pgm2l1, Luzp1, Bud31, H2-Ab1, Ndufc2, Rnf4, Matr3, Ndufs5, Tmem160, Lsm8, Mpc1, Nmb, Sf3b6, Sh3bgrl3, Hsp90b1, Fmr1nb, Esf1, Mlf1, Ost4, Lig1, Tsen34, Dazl, Stx12, Baz1a, Slc25a17, Ube2n, Atpif1, Hmgb1, Rhox7b, Ube2a, Ddb2, Ddx10, Ndufa2, Inca1, Sdhc, 2310061I04Rik, Serf1, Psmd1, Sec11c, Lypd4, Higd1a, Siva1, Syt14, Tmem5, Pip5k1a, Anapc13, Vdac3, Ctnnbip1, Madd, Trpc1, Pbdc1, Gpat2, Stk38, Pigf, Pura, Abhd18, Lars, Hdgfl2, Btbd10, Txnl1, Tiparp, Hspb11, Ciao1, Pdap1, Top2b, Eif3i, Emc8. Download a pdf of this figure here.

Category Feature Plot Pachytene_Niu_et_at_2020

**Figure 22A:** Category Scatter showing gene category Pachytene_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: 2410089E03Rik, 4930447C04Rik, 4932438A13Rik, 9930021J03Rik, Agbl3, AI314180, Akap11, Alms1, Ankhd1, Ankrd12, Ankrd31, Arhgap5, Arid4b, Atf7ip2, Atm, Atr, AY036118, Baz1b, BC051142, Birc6, Bod1l, Bptf, Brca2, Btaf1, Btbd7, Bub1, Caprin2, Casp8ap2, Ccdc18, Ccdc88a, Ccnb3, Cdk12, Cdk13, Cdkl2, Cep290, Cep63, Cep83, Ckap5, Cntrl, Copb2, Crebrf, Cul3, D3Ertd751e, Ddx10, Ddx17, Dhx36, Dip2c, Dmxl1, Dnajb4, Dopey1, Dync1h1, Dync2h1, Dzip3, Eea1, Eif4enif1, Ercc6l2, Etnk1, Fam208a, Fam208b, Fmr1, Gls, Gm42418, Golga4, Gpbp1, Grk4, H2-K1, Herc1, Hfm1, Hist1h1e, Hist1h2aa, Hist1h4d, Hook1, Hormad1, Hspa5, Hspb11, Huwe1, Ints6, Iqcb1, Iws1, Jmjd1c, Kcnq1ot1, Kdm2a, Kdm5a, Kdm5b, Kif2a, Kmt2c, Larp1b, Lars2, Luc7l2, Macf1, Mdc1, Mdn1, Meioc, Mki67, mt-Atp8, mt-Co2, mt-Nd2, mt-Nd3, mt-Nd4l, mt-Nd6, Mycbp2, Myo5a, Myo9a, Mysm1, Naa15, Nasp, Nbeal1, Ncapg, Nek1, Nfat5, Nipbl, Nktr, Nol8, Nrd1, Nsd1, Odf2l, Ofd1, Pabpc1, Paxbp1, Pbx3, Pcm1, Phf20l1, Phf21a, Phf7, Phf8, Pigp, Pik3c2a, Polr2a, Prdm9, Qk, Rad50, Rad51ap2, Ranbp2, Rasa1, Rb1cc1, Rbm5, Rif1, Rnf17, Rnf20, Rock1, Rock2, Rpgr, Sacs, Scaper, Sclt1, Sec63, Senp6, Setdb1, Setx, Smarca5, Smc1b, Smc4, Smg1, Snrnp70, Spdya, Specc1l, Sptan1, Stag3, Stk31, Suco, Sycp1, Sycp2, Taf15, Taf7l, Taok1, Tcf20, Terb1, Tex11, Tex15, Tex16, Tfrc, Thoc2, Tmx4, Topbp1, Tpr, Trip11, Trip12, Trmt10a, Trpm7, Tsga10, Ttc3, Unc13b, Usp32, Usp34, Usp47, Usp9x, Uvssa, Wnk1, Zcchc11, Zcchc7, Zcwpw1, Zdbf2, Zfp318, Zfp91, Zhx1, Zkscan3, Zmym6, Zranb1, Zufsp. Download a pdf of this figure [here](report_figures/CatScatterPachytene_Niu_et_at_2020.V20201005.pdf).

Figure 22A: Category Scatter showing gene category Pachytene_Niu_et_at_2020. The following genes of this dataset are represented in this figure: 2410089E03Rik, 4930447C04Rik, 4932438A13Rik, 9930021J03Rik, Agbl3, AI314180, Akap11, Alms1, Ankhd1, Ankrd12, Ankrd31, Arhgap5, Arid4b, Atf7ip2, Atm, Atr, AY036118, Baz1b, BC051142, Birc6, Bod1l, Bptf, Brca2, Btaf1, Btbd7, Bub1, Caprin2, Casp8ap2, Ccdc18, Ccdc88a, Ccnb3, Cdk12, Cdk13, Cdkl2, Cep290, Cep63, Cep83, Ckap5, Cntrl, Copb2, Crebrf, Cul3, D3Ertd751e, Ddx10, Ddx17, Dhx36, Dip2c, Dmxl1, Dnajb4, Dopey1, Dync1h1, Dync2h1, Dzip3, Eea1, Eif4enif1, Ercc6l2, Etnk1, Fam208a, Fam208b, Fmr1, Gls, Gm42418, Golga4, Gpbp1, Grk4, H2-K1, Herc1, Hfm1, Hist1h1e, Hist1h2aa, Hist1h4d, Hook1, Hormad1, Hspa5, Hspb11, Huwe1, Ints6, Iqcb1, Iws1, Jmjd1c, Kcnq1ot1, Kdm2a, Kdm5a, Kdm5b, Kif2a, Kmt2c, Larp1b, Lars2, Luc7l2, Macf1, Mdc1, Mdn1, Meioc, Mki67, mt-Atp8, mt-Co2, mt-Nd2, mt-Nd3, mt-Nd4l, mt-Nd6, Mycbp2, Myo5a, Myo9a, Mysm1, Naa15, Nasp, Nbeal1, Ncapg, Nek1, Nfat5, Nipbl, Nktr, Nol8, Nrd1, Nsd1, Odf2l, Ofd1, Pabpc1, Paxbp1, Pbx3, Pcm1, Phf20l1, Phf21a, Phf7, Phf8, Pigp, Pik3c2a, Polr2a, Prdm9, Qk, Rad50, Rad51ap2, Ranbp2, Rasa1, Rb1cc1, Rbm5, Rif1, Rnf17, Rnf20, Rock1, Rock2, Rpgr, Sacs, Scaper, Sclt1, Sec63, Senp6, Setdb1, Setx, Smarca5, Smc1b, Smc4, Smg1, Snrnp70, Spdya, Specc1l, Sptan1, Stag3, Stk31, Suco, Sycp1, Sycp2, Taf15, Taf7l, Taok1, Tcf20, Terb1, Tex11, Tex15, Tex16, Tfrc, Thoc2, Tmx4, Topbp1, Tpr, Trip11, Trip12, Trmt10a, Trpm7, Tsga10, Ttc3, Unc13b, Usp32, Usp34, Usp47, Usp9x, Uvssa, Wnk1, Zcchc11, Zcchc7, Zcwpw1, Zdbf2, Zfp318, Zfp91, Zhx1, Zkscan3, Zmym6, Zranb1, Zufsp. Download a pdf of this figure here.

**Figure 22B:** Dotplot showing gene category Dotplot_Pachytene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: 4930447C04Rik, Tex15, Sycp2, Atr, Myo9a, Cep290, Brca2, Gm42418, Fmr1, 4932438A13Rik, Caprin2, Meioc, Hist1h2aa, Ankrd31, Hist1h1e, Rb1cc1, AY036118, Sycp1, Hspa5, Luc7l2, Hfm1, Nipbl, Arhgap5, Etnk1, Topbp1, Smc1b, Macf1, Ccdc88a, Stag3, Nfat5, H2-K1, Ccnb3, Zfp318, Setx, Smarca5, Wnk1, Zhx1, Rock1, Rad51ap2, Zcwpw1, Gls, Zcchc11, Tex16, Prdm9, Tsga10, Fam208b, mt-Atp8, Huwe1, 2410089E03Rik, Pcm1, Ckap5, Zkscan3, Herc1, Taf15, mt-Nd4l, Uvssa, Mki67, Ccdc18, Dzip3, Ttc3, Smc4, 9930021J03Rik, Unc13b, Pik3c2a, Bptf, Terb1, Ankhd1, Rock2, Dopey1, Spdya, Zdbf2, Hspb11, Snrnp70, Taok1, Thoc2, Zmym6, Tpr, mt-Nd6, Kmt2c, Alms1, Scaper, Cul3, mt-Nd2, Dhx36, Bod1l, Usp32, Ints6, Trip12, Fam208a, Nrd1, Rnf17, Cntrl, Polr2a, Btbd7, Lars2, Ankrd12, Golga4, Smg1, Hist1h4d, D3Ertd751e, Usp9x, Sclt1, Ddx10, Atf7ip2, Mycbp2, Nol8, Tfrc, AI314180, Kdm5a, Tmx4, Trip11, Usp34, Eea1, Sec63, Zfp91, Larp1b, Trmt10a, Mysm1, Cdk13, Nktr, Gpbp1, Cdkl2, Atm, Suco, Odf2l, Dync1h1, Btaf1, Rif1, Ddx17, Pabpc1, Cep63, Jmjd1c, Copb2, Nbeal1, Crebrf, Qk, Mdn1, Nsd1, Trpm7, Ofd1, Cdk12, Rpgr, Grk4, mt-Co2, Kif2a, Cep83, Zufsp, Phf8, Hormad1, Naa15, Kdm2a, Dmxl1, Ercc6l2, Ncapg, Sacs, Dip2c, Usp47, Myo5a, Hook1, Phf7, Kdm5b, Bub1, Setdb1, Phf20l1, Casp8ap2, Akap11, Stk31, Mdc1, Zcchc7, Kcnq1ot1, Rbm5, Phf21a, mt-Nd3, Paxbp1, Iws1, Nek1, Sptan1, Tcf20, Iqcb1, Nasp, Dync2h1, Pigp, BC051142, Zranb1, Senp6, Tex11, Taf7l, Dnajb4, Rnf20, Specc1l, Birc6, Eif4enif1, Rasa1, Arid4b, Pbx3, Ranbp2, Agbl3, Rad50, Baz1b. Download a pdf of this figure [here](report_figures/Dotplot_Pachytene_Niu_et_at_2020.V20201005.pdf).

Figure 22B: Dotplot showing gene category Dotplot_Pachytene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: 4930447C04Rik, Tex15, Sycp2, Atr, Myo9a, Cep290, Brca2, Gm42418, Fmr1, 4932438A13Rik, Caprin2, Meioc, Hist1h2aa, Ankrd31, Hist1h1e, Rb1cc1, AY036118, Sycp1, Hspa5, Luc7l2, Hfm1, Nipbl, Arhgap5, Etnk1, Topbp1, Smc1b, Macf1, Ccdc88a, Stag3, Nfat5, H2-K1, Ccnb3, Zfp318, Setx, Smarca5, Wnk1, Zhx1, Rock1, Rad51ap2, Zcwpw1, Gls, Zcchc11, Tex16, Prdm9, Tsga10, Fam208b, mt-Atp8, Huwe1, 2410089E03Rik, Pcm1, Ckap5, Zkscan3, Herc1, Taf15, mt-Nd4l, Uvssa, Mki67, Ccdc18, Dzip3, Ttc3, Smc4, 9930021J03Rik, Unc13b, Pik3c2a, Bptf, Terb1, Ankhd1, Rock2, Dopey1, Spdya, Zdbf2, Hspb11, Snrnp70, Taok1, Thoc2, Zmym6, Tpr, mt-Nd6, Kmt2c, Alms1, Scaper, Cul3, mt-Nd2, Dhx36, Bod1l, Usp32, Ints6, Trip12, Fam208a, Nrd1, Rnf17, Cntrl, Polr2a, Btbd7, Lars2, Ankrd12, Golga4, Smg1, Hist1h4d, D3Ertd751e, Usp9x, Sclt1, Ddx10, Atf7ip2, Mycbp2, Nol8, Tfrc, AI314180, Kdm5a, Tmx4, Trip11, Usp34, Eea1, Sec63, Zfp91, Larp1b, Trmt10a, Mysm1, Cdk13, Nktr, Gpbp1, Cdkl2, Atm, Suco, Odf2l, Dync1h1, Btaf1, Rif1, Ddx17, Pabpc1, Cep63, Jmjd1c, Copb2, Nbeal1, Crebrf, Qk, Mdn1, Nsd1, Trpm7, Ofd1, Cdk12, Rpgr, Grk4, mt-Co2, Kif2a, Cep83, Zufsp, Phf8, Hormad1, Naa15, Kdm2a, Dmxl1, Ercc6l2, Ncapg, Sacs, Dip2c, Usp47, Myo5a, Hook1, Phf7, Kdm5b, Bub1, Setdb1, Phf20l1, Casp8ap2, Akap11, Stk31, Mdc1, Zcchc7, Kcnq1ot1, Rbm5, Phf21a, mt-Nd3, Paxbp1, Iws1, Nek1, Sptan1, Tcf20, Iqcb1, Nasp, Dync2h1, Pigp, BC051142, Zranb1, Senp6, Tex11, Taf7l, Dnajb4, Rnf20, Specc1l, Birc6, Eif4enif1, Rasa1, Arid4b, Pbx3, Ranbp2, Agbl3, Rad50, Baz1b. Download a pdf of this figure here.

Category Feature Plot Diplotene_Niu_et_at_2020

**Figure 23A:** Category Scatter showing gene category Diplotene_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: 1700018B24Rik, 4930447C04Rik, 4932435O22Rik, 4933416C03Rik, 8030474K03Rik, Ablim1, AC160336.1, Actg1, Aen, Agbl3, Ajuba, Alg1, Asic5, Asxl1, Asz1, Atp5f1, Atp6v0b, Atp6v0e, Atp6v1c1, Atp6v1d, Bloc1s1, Brd2, Calr, Carnmt1, Ccdc136, Ccdc152, Ccdc59, Ccdc73, Ccnb3, Cetn4, Cfap36, Chd2, Chic2, Cirbp, Clip4, Cluap1, Commd3, Coq8a, Cpeb1, Crbn, Ddx25, Ddx5, Dgkh, Dld, Dlk1, Dnajc1, Dpm1, Dync2li1, Eea1, Efcab2, Eif1b, Eif4a2, Eif4g3, Ergic3, Erlec1, Fam216a, Fam229b, Fbxo7, Figla, Fmr1nb, Ftl1, Gab3, Gc, Gclm, Ginm1, Gm11808, Gm11837, Gm11985, Gm13269, Gm1673, Gm27164, Gm364, Gm44601, Gm47283, Gm4779, Gm47802, Gm49368, Gm5617, Gm773, Gnpda2, Grid2, Gtf2a2, Gtf3c2, H1f0, H1fx, Hcfc2, Hes1, Id1, Id2, Ift22, Ints6, Isg20, Josd2, Kifap3, Krit1, Lamtor3, Lamtor5, Las1l, Lhx8, Lman2l, Ly6k, Mael, Mageb4, Malat1, Maoa, Mat2a, Med30, Morn2, Morn5, Mov10l1, Mrps24, Naa20, Nbdy, Ndufa1, Nkap, Nmnat3, Nphp1, Nup62cl, Ociad1, Odc1, Ola1, Olfr678, Pabpc1, Pcgf5, Pcmt1, Pdia6, Pet100, Pet2, Pfn2, Piwil2, Pno1, Pnrc1, Podxl2, Pogk, Ppp1r11, Prdm16, Prdx4, Prps1, Psmg4, Reg2, Rhox1, Rnf212, Romo1, Rpl22l1, Rpl35, Rpl37a, Rps20, Rps27, Rps28, Rps29, Rps7, Rsrp1, Sap18, Slc20a1, Smc4, Smim19, Smim8, Sohlh1, Spata33, Spcs1, Speer4a, Spint2, Spo11, Srpk2, Ssbp2, Ssr2, Ssr4, Svbp, Syce3, Sycp1, Sycp3, Taf7l, Taf9b, Tbl1x, Tdrd9, Tet3, Tex264, Tktl1, Tm2d3, Tmbim4, Tmc2, Tnrc6a, Tob1, Topaz1, Trappc2l, Tsc22d1, Tsc22d3, Txndc16, Uba52, Ubn1, Ugp2, Unc50, Usp26, Wdpcp, Wdr26, Ybx2, Ypel3, Zfp560, Zfp612. Download a pdf of this figure [here](report_figures/CatScatterDiplotene_Niu_et_at_2020.V20201005.pdf).

Figure 23A: Category Scatter showing gene category Diplotene_Niu_et_at_2020. The following genes of this dataset are represented in this figure: 1700018B24Rik, 4930447C04Rik, 4932435O22Rik, 4933416C03Rik, 8030474K03Rik, Ablim1, AC160336.1, Actg1, Aen, Agbl3, Ajuba, Alg1, Asic5, Asxl1, Asz1, Atp5f1, Atp6v0b, Atp6v0e, Atp6v1c1, Atp6v1d, Bloc1s1, Brd2, Calr, Carnmt1, Ccdc136, Ccdc152, Ccdc59, Ccdc73, Ccnb3, Cetn4, Cfap36, Chd2, Chic2, Cirbp, Clip4, Cluap1, Commd3, Coq8a, Cpeb1, Crbn, Ddx25, Ddx5, Dgkh, Dld, Dlk1, Dnajc1, Dpm1, Dync2li1, Eea1, Efcab2, Eif1b, Eif4a2, Eif4g3, Ergic3, Erlec1, Fam216a, Fam229b, Fbxo7, Figla, Fmr1nb, Ftl1, Gab3, Gc, Gclm, Ginm1, Gm11808, Gm11837, Gm11985, Gm13269, Gm1673, Gm27164, Gm364, Gm44601, Gm47283, Gm4779, Gm47802, Gm49368, Gm5617, Gm773, Gnpda2, Grid2, Gtf2a2, Gtf3c2, H1f0, H1fx, Hcfc2, Hes1, Id1, Id2, Ift22, Ints6, Isg20, Josd2, Kifap3, Krit1, Lamtor3, Lamtor5, Las1l, Lhx8, Lman2l, Ly6k, Mael, Mageb4, Malat1, Maoa, Mat2a, Med30, Morn2, Morn5, Mov10l1, Mrps24, Naa20, Nbdy, Ndufa1, Nkap, Nmnat3, Nphp1, Nup62cl, Ociad1, Odc1, Ola1, Olfr678, Pabpc1, Pcgf5, Pcmt1, Pdia6, Pet100, Pet2, Pfn2, Piwil2, Pno1, Pnrc1, Podxl2, Pogk, Ppp1r11, Prdm16, Prdx4, Prps1, Psmg4, Reg2, Rhox1, Rnf212, Romo1, Rpl22l1, Rpl35, Rpl37a, Rps20, Rps27, Rps28, Rps29, Rps7, Rsrp1, Sap18, Slc20a1, Smc4, Smim19, Smim8, Sohlh1, Spata33, Spcs1, Speer4a, Spint2, Spo11, Srpk2, Ssbp2, Ssr2, Ssr4, Svbp, Syce3, Sycp1, Sycp3, Taf7l, Taf9b, Tbl1x, Tdrd9, Tet3, Tex264, Tktl1, Tm2d3, Tmbim4, Tmc2, Tnrc6a, Tob1, Topaz1, Trappc2l, Tsc22d1, Tsc22d3, Txndc16, Uba52, Ubn1, Ugp2, Unc50, Usp26, Wdpcp, Wdr26, Ybx2, Ypel3, Zfp560, Zfp612. Download a pdf of this figure here.

**Figure 23B:** Dotplot showing gene category Dotplot_Diplotene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Id1, Gm27164, Uba52, AC160336.1, Syce3, Malat1, Pet2, Gm13269, Mael, Brd2, Aen, Ablim1, Gm44601, Ndufa1, Maoa, Spint2, Gm11808, Figla, Eif4a2, Grid2, Gm11985, Gm47283, Sycp1, 1700018B24Rik, Fam229b, Taf7l, Gc, Rps20, Reg2, Tdrd9, Rsrp1, Morn5, Gab3, Josd2, Ergic3, Ccdc73, Id2, Nmnat3, Atp6v0b, Rnf212, Ppp1r11, Gm1673, Fam216a, Rps28, Tnrc6a, Olfr678, Efcab2, Wdpcp, Atp6v1d, Eif4g3, Mat2a, 4932435O22Rik, Gm49368, Tob1, Lman2l, Ccnb3, Dpm1, Pfn2, Dld, Fmr1nb, Usp26, Tktl1, H1f0, Bloc1s1, Ubn1, Gm364, Romo1, Ssbp2, Ccdc152, Ssr2, 8030474K03Rik, Lamtor5, Topaz1, Erlec1, Rps27, 4930447C04Rik, Eea1, Smc4, Cetn4, Sohlh1, Eif1b, Trappc2l, Pnrc1, Slc20a1, 4933416C03Rik, Gm4779, Ajuba, Lhx8, Zfp612, Tsc22d1, Odc1, Prdx4, Chd2, Atp5f1, Ly6k, Smim8, Mov10l1, Wdr26, Piwil2, Ypel3, Morn2, Ssr4, Asxl1, Rpl37a, Speer4a, Spata33, Prdm16, Pet100, Prps1, Kifap3, Tmbim4, Mageb4, Spcs1, Alg1, Smim19, Tsc22d3, Cpeb1, Ybx2, Crbn, Sycp3, Unc50, Naa20, Tex264, Ccdc59, Ddx5, H1fx, Dync2li1, Dgkh, Zfp560, Pogk, Tm2d3, Cirbp, Gclm, Dlk1, Gm47802, Clip4, Asz1, Taf9b, Rpl35, Fbxo7, Cfap36, Asic5, Ginm1, Hcfc2, Rhox1, Rps29, Chic2, Mrps24, Gtf2a2, Tbl1x, Tmc2, Ift22, Agbl3, Tet3, Psmg4, Ints6, Rps7, Gm773, Ugp2, Med30, Nup62cl, Ociad1, Actg1, Las1l, Rpl22l1, Ddx25, Sap18, Pcgf5, Nkap, Ftl1, Svbp, Ccdc136, Nbdy, Hes1, Atp6v1c1, Cluap1, Commd3, Txndc16, Pno1, Gnpda2, Isg20, Nphp1, Lamtor3, Pdia6, Gm5617, Podxl2, Krit1, Spo11, Coq8a, Pabpc1, Pcmt1, Ola1, Carnmt1, Atp6v0e, Srpk2, Gm11837, Dnajc1, Calr, Gtf3c2. Download a pdf of this figure [here](report_figures/Dotplot_Diplotene_Niu_et_at_2020.V20201005.pdf).

Figure 23B: Dotplot showing gene category Dotplot_Diplotene_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Id1, Gm27164, Uba52, AC160336.1, Syce3, Malat1, Pet2, Gm13269, Mael, Brd2, Aen, Ablim1, Gm44601, Ndufa1, Maoa, Spint2, Gm11808, Figla, Eif4a2, Grid2, Gm11985, Gm47283, Sycp1, 1700018B24Rik, Fam229b, Taf7l, Gc, Rps20, Reg2, Tdrd9, Rsrp1, Morn5, Gab3, Josd2, Ergic3, Ccdc73, Id2, Nmnat3, Atp6v0b, Rnf212, Ppp1r11, Gm1673, Fam216a, Rps28, Tnrc6a, Olfr678, Efcab2, Wdpcp, Atp6v1d, Eif4g3, Mat2a, 4932435O22Rik, Gm49368, Tob1, Lman2l, Ccnb3, Dpm1, Pfn2, Dld, Fmr1nb, Usp26, Tktl1, H1f0, Bloc1s1, Ubn1, Gm364, Romo1, Ssbp2, Ccdc152, Ssr2, 8030474K03Rik, Lamtor5, Topaz1, Erlec1, Rps27, 4930447C04Rik, Eea1, Smc4, Cetn4, Sohlh1, Eif1b, Trappc2l, Pnrc1, Slc20a1, 4933416C03Rik, Gm4779, Ajuba, Lhx8, Zfp612, Tsc22d1, Odc1, Prdx4, Chd2, Atp5f1, Ly6k, Smim8, Mov10l1, Wdr26, Piwil2, Ypel3, Morn2, Ssr4, Asxl1, Rpl37a, Speer4a, Spata33, Prdm16, Pet100, Prps1, Kifap3, Tmbim4, Mageb4, Spcs1, Alg1, Smim19, Tsc22d3, Cpeb1, Ybx2, Crbn, Sycp3, Unc50, Naa20, Tex264, Ccdc59, Ddx5, H1fx, Dync2li1, Dgkh, Zfp560, Pogk, Tm2d3, Cirbp, Gclm, Dlk1, Gm47802, Clip4, Asz1, Taf9b, Rpl35, Fbxo7, Cfap36, Asic5, Ginm1, Hcfc2, Rhox1, Rps29, Chic2, Mrps24, Gtf2a2, Tbl1x, Tmc2, Ift22, Agbl3, Tet3, Psmg4, Ints6, Rps7, Gm773, Ugp2, Med30, Nup62cl, Ociad1, Actg1, Las1l, Rpl22l1, Ddx25, Sap18, Pcgf5, Nkap, Ftl1, Svbp, Ccdc136, Nbdy, Hes1, Atp6v1c1, Cluap1, Commd3, Txndc16, Pno1, Gnpda2, Isg20, Nphp1, Lamtor3, Pdia6, Gm5617, Podxl2, Krit1, Spo11, Coq8a, Pabpc1, Pcmt1, Ola1, Carnmt1, Atp6v0e, Srpk2, Gm11837, Dnajc1, Calr, Gtf3c2. Download a pdf of this figure here.

Category Feature Plot Dictyate_Niu_et_at_2020

**Figure 24A:** Category Scatter showing gene category Dictyate_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: 0610012G03Rik, 4921524J17Rik, 4930562C15Rik, Abi3bp, Acat1, Adss, Aen, Afdn, Ak2, Akap17b, Akr1b3, Aldh9a1, Alg13, Anxa7, Aptx, Arhgap10, Arhgap20os, Arl2, Armc1, Arpc5l, Atp6v0e, Birc5, Bpgm, Bri3, Bud31, Calm1, Ccdc115, Ccdc186, Ccno, Cd164l2, Cd55, Cdv3, Cenpx, Cfl1, Cirbp, Ckb, Cks2, Cnpy1, Commd3, Cox4i2, Crlf1, Csde1, Csnk1e, D10Wsu102e, D8Ertd738e, Dad1, Ddx6, Dicer1, Dlgap4, Dnajc15, Dpf3, Dppa3, Drap1, Drosha, Dusp12, E2f5, E330017A01Rik, Eea1, Eif1, Eif1ax, Eif1b, Eif4ebp1, Eif6, Elavl2, Ergic3, Esrp1, Fam199x, Fam220a, Figla, Foxo3, G3bp2, Gdf9, Gdpd1, Gm11985, Gm15389, Gm47432, Gm47448, Gm47918, Gsta4, Gtsf1, H1foo, Hexdc, Hjurp, Hsbp1, Ift27, Ift43, Iqca, Itpr1, Kdm1b, Kit, Ktn1, Ldhb, Lhx8, Lsm14b, Mageb4, Mat2b, Mbl2, Mea1, Med28, Mgst3, Mien1, Mlf2, Mphosph6, Mrnip, Mrpl32, Mtmr14, Mtus1, Mvp, Ndufaf4, Neb, Necap2, Nexn, Ninj1, Nlrp14, Nlrp4f, Nlrp5, Nobox, Nos1, Npm2, Nucks1, Oas1c, Ooep, Pabpc4, Padi6, Pard3, Parl, Pcsk1n, Pdzd2, Pdzk1, Pfdn4, Phgdh, Pkd2l2, Polr2e, Polr2j, Polr3c, Pop5, Ppfibp2, Pramef12, Psenen, Ptp4a2, Ptpn18, Rab3d, Ralbp1, Rbakdn, Rdx, Reep5, Rfk, Rnaset2a, Rnaset2b, Rnf8, Rpp21, Scand1, Sec11c, Setd4, Sfr1, Sipa1, Sod1, Sohlh1, Spats2, Spc24, Srp9, Sssca1, Stat3, Sumo3, Syne2, Tcl1, Tdrd1, Tesc, Tle6, Tma7, Tmem108, Tmem14c, Tmsb10, Tsga8, Tspan13, Ttn, Tuba1c, Tubb2b, Txndc9, Ube2c, Uchl1, Uhrf1, Unc13c, Uqcc2, Uqcrq, Usp2, Usp7, Vkorc1, Vps72, Wasf2, Wwc2, Xdh, Ybx2, Ypel5, Zbed3, Zfp57, Zfp787, Zp2, Zp3. Download a pdf of this figure [here](report_figures/CatScatterDictyate_Niu_et_at_2020.V20201005.pdf).

Figure 24A: Category Scatter showing gene category Dictyate_Niu_et_at_2020. The following genes of this dataset are represented in this figure: 0610012G03Rik, 4921524J17Rik, 4930562C15Rik, Abi3bp, Acat1, Adss, Aen, Afdn, Ak2, Akap17b, Akr1b3, Aldh9a1, Alg13, Anxa7, Aptx, Arhgap10, Arhgap20os, Arl2, Armc1, Arpc5l, Atp6v0e, Birc5, Bpgm, Bri3, Bud31, Calm1, Ccdc115, Ccdc186, Ccno, Cd164l2, Cd55, Cdv3, Cenpx, Cfl1, Cirbp, Ckb, Cks2, Cnpy1, Commd3, Cox4i2, Crlf1, Csde1, Csnk1e, D10Wsu102e, D8Ertd738e, Dad1, Ddx6, Dicer1, Dlgap4, Dnajc15, Dpf3, Dppa3, Drap1, Drosha, Dusp12, E2f5, E330017A01Rik, Eea1, Eif1, Eif1ax, Eif1b, Eif4ebp1, Eif6, Elavl2, Ergic3, Esrp1, Fam199x, Fam220a, Figla, Foxo3, G3bp2, Gdf9, Gdpd1, Gm11985, Gm15389, Gm47432, Gm47448, Gm47918, Gsta4, Gtsf1, H1foo, Hexdc, Hjurp, Hsbp1, Ift27, Ift43, Iqca, Itpr1, Kdm1b, Kit, Ktn1, Ldhb, Lhx8, Lsm14b, Mageb4, Mat2b, Mbl2, Mea1, Med28, Mgst3, Mien1, Mlf2, Mphosph6, Mrnip, Mrpl32, Mtmr14, Mtus1, Mvp, Ndufaf4, Neb, Necap2, Nexn, Ninj1, Nlrp14, Nlrp4f, Nlrp5, Nobox, Nos1, Npm2, Nucks1, Oas1c, Ooep, Pabpc4, Padi6, Pard3, Parl, Pcsk1n, Pdzd2, Pdzk1, Pfdn4, Phgdh, Pkd2l2, Polr2e, Polr2j, Polr3c, Pop5, Ppfibp2, Pramef12, Psenen, Ptp4a2, Ptpn18, Rab3d, Ralbp1, Rbakdn, Rdx, Reep5, Rfk, Rnaset2a, Rnaset2b, Rnf8, Rpp21, Scand1, Sec11c, Setd4, Sfr1, Sipa1, Sod1, Sohlh1, Spats2, Spc24, Srp9, Sssca1, Stat3, Sumo3, Syne2, Tcl1, Tdrd1, Tesc, Tle6, Tma7, Tmem108, Tmem14c, Tmsb10, Tsga8, Tspan13, Ttn, Tuba1c, Tubb2b, Txndc9, Ube2c, Uchl1, Uhrf1, Unc13c, Uqcc2, Uqcrq, Usp2, Usp7, Vkorc1, Vps72, Wasf2, Wwc2, Xdh, Ybx2, Ypel5, Zbed3, Zfp57, Zfp787, Zp2, Zp3. Download a pdf of this figure here.

**Figure 24B:** Dotplot showing gene category Dotplot_Dictyate_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Uchl1, Gdpd1, Ooep, Ldhb, Gm15389, Gtsf1, Padi6, Dppa3, G3bp2, D10Wsu102e, Tmsb10, Tmem14c, Figla, Ttn, Sohlh1, Zp3, Tle6, Zbed3, Reep5, Rdx, Cd164l2, Vkorc1, Xdh, Acat1, Ift27, Lsm14b, Ccno, Parl, Anxa7, Arpc5l, Zp2, Setd4, Stat3, Mtmr14, Crlf1, Mat2b, Tuba1c, Ybx2, Lhx8, Ninj1, Nexn, Abi3bp, Ptpn18, Neb, Kdm1b, 0610012G03Rik, Tcl1, Bpgm, Pkd2l2, Dicer1, Dlgap4, Kit, 4921524J17Rik, Nlrp5, Sfr1, Nobox, Cirbp, Drap1, Gm11985, Gsta4, Akap17b, Ube2c, Tdrd1, Gm47448, Mphosph6, Nlrp4f, Rnaset2a, Mvp, Gdf9, Dusp12, Ckb, Ralbp1, Wwc2, Birc5, Spc24, Mbl2, Elavl2, Npm2, Calm1, Pramef12, Ppfibp2, Ift43, Sumo3, Psenen, Cks2, Esrp1, Uqcrq, Pabpc4, H1foo, Tma7, Tmem108, 4930562C15Rik, Alg13, Cd55, Cfl1, E2f5, Rnaset2b, Syne2, Txndc9, Scand1, Ktn1, Afdn, Sec11c, Wasf2, Cox4i2, Csnk1e, Eif1ax, E330017A01Rik, Rbakdn, Polr2j, Bri3, Mlf2, Arhgap10, Mgst3, Dad1, Tubb2b, Mageb4, Aldh9a1, Cenpx, Foxo3, Dnajc15, Nos1, Phgdh, Iqca, Hexdc, Itpr1, Mea1, Dpf3, Zfp787, Sssca1, Eea1, Tspan13, Akr1b3, Hsbp1, Arhgap20os, Rpp21, Hjurp, Unc13c, Cnpy1, Pop5, Tsga8, Necap2, Med28, D8Ertd738e, Ccdc186, Nlrp14, Sod1, Eif6, Gm47918, Ddx6, Usp2, Eif4ebp1, Cdv3, Ndufaf4, Mien1, Arl2, Srp9, Spats2, Pcsk1n, Atp6v0e, Eif1b, Pdzd2, Eif1, Nucks1, Pard3, Sipa1, Csde1, Mrnip, Fam199x, Rfk, Zfp57, Polr3c, Aen, Armc1, Usp7, Rab3d, Ptp4a2, Uhrf1, Drosha, Uqcc2, Mtus1, Oas1c, Tesc, Ypel5, Vps72, Ccdc115, Adss, Pdzk1, Aptx, Pfdn4, Ak2, Polr2e, Rnf8, Gm47432, Mrpl32, Fam220a, Bud31, Commd3, Ergic3. Download a pdf of this figure [here](report_figures/Dotplot_Dictyate_Niu_et_at_2020.V20201005.pdf).

Figure 24B: Dotplot showing gene category Dotplot_Dictyate_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Uchl1, Gdpd1, Ooep, Ldhb, Gm15389, Gtsf1, Padi6, Dppa3, G3bp2, D10Wsu102e, Tmsb10, Tmem14c, Figla, Ttn, Sohlh1, Zp3, Tle6, Zbed3, Reep5, Rdx, Cd164l2, Vkorc1, Xdh, Acat1, Ift27, Lsm14b, Ccno, Parl, Anxa7, Arpc5l, Zp2, Setd4, Stat3, Mtmr14, Crlf1, Mat2b, Tuba1c, Ybx2, Lhx8, Ninj1, Nexn, Abi3bp, Ptpn18, Neb, Kdm1b, 0610012G03Rik, Tcl1, Bpgm, Pkd2l2, Dicer1, Dlgap4, Kit, 4921524J17Rik, Nlrp5, Sfr1, Nobox, Cirbp, Drap1, Gm11985, Gsta4, Akap17b, Ube2c, Tdrd1, Gm47448, Mphosph6, Nlrp4f, Rnaset2a, Mvp, Gdf9, Dusp12, Ckb, Ralbp1, Wwc2, Birc5, Spc24, Mbl2, Elavl2, Npm2, Calm1, Pramef12, Ppfibp2, Ift43, Sumo3, Psenen, Cks2, Esrp1, Uqcrq, Pabpc4, H1foo, Tma7, Tmem108, 4930562C15Rik, Alg13, Cd55, Cfl1, E2f5, Rnaset2b, Syne2, Txndc9, Scand1, Ktn1, Afdn, Sec11c, Wasf2, Cox4i2, Csnk1e, Eif1ax, E330017A01Rik, Rbakdn, Polr2j, Bri3, Mlf2, Arhgap10, Mgst3, Dad1, Tubb2b, Mageb4, Aldh9a1, Cenpx, Foxo3, Dnajc15, Nos1, Phgdh, Iqca, Hexdc, Itpr1, Mea1, Dpf3, Zfp787, Sssca1, Eea1, Tspan13, Akr1b3, Hsbp1, Arhgap20os, Rpp21, Hjurp, Unc13c, Cnpy1, Pop5, Tsga8, Necap2, Med28, D8Ertd738e, Ccdc186, Nlrp14, Sod1, Eif6, Gm47918, Ddx6, Usp2, Eif4ebp1, Cdv3, Ndufaf4, Mien1, Arl2, Srp9, Spats2, Pcsk1n, Atp6v0e, Eif1b, Pdzd2, Eif1, Nucks1, Pard3, Sipa1, Csde1, Mrnip, Fam199x, Rfk, Zfp57, Polr3c, Aen, Armc1, Usp7, Rab3d, Ptp4a2, Uhrf1, Drosha, Uqcc2, Mtus1, Oas1c, Tesc, Ypel5, Vps72, Ccdc115, Adss, Pdzk1, Aptx, Pfdn4, Ak2, Polr2e, Rnf8, Gm47432, Mrpl32, Fam220a, Bud31, Commd3, Ergic3. Download a pdf of this figure here.

Category Feature Plot Dying_nurse_cell_Niu_et_at_2020

**Figure 25A:** Category Scatter showing gene category Dying_nurse_cell_Niu_et_at_2020.  The following genes of this dataset are represented in this figure: 1700018B24Rik, 2410015M20Rik, AC160336.1, Acat1, Aen, Aig1, Akap17b, Akr1b3, Aktip, Anp32a, Anxa7, Apoo, Aptx, Arf5, Arl3, Arpc5l, Asnsd1, Asz1, Atp6v0e, Atp6v1d, AY036118, Bcl7a, Bri3, Card19, Cast, Ccdc115, Ccdc152, Ccdc73, Ccnb1ip1, Ccnb2, Ccnb3, Cdk2, Cetn4, Cirbp, Cks2, Copb2, Coprs, Cops5, Cul3, Cyb5r3, Dcps, Ddah1, Dhx40, Dnajb9, Dync1i2, Efcab2, Eif1ax, Eif1b, Eif3c, Eif3e, Eif4a1, Eif4a2, Eif4e, Epb41l2, Eps15, Ergic3, Fam50a, Figla, G3bp2, Gc, Gdpd1, Gid4, Gm11985, Gm17018, Gm26917, Gm27164, Gm42418, Gnpda2, Gpbp1, Gsdme, Gstm2, Gtsf1, H19, Hba-a1, Hba-a2, Hbb-bs, Hbb-bt, Hes1, Hist1h1e, Hormad1, Hpf1, Hsf2, Id1, Idh3a, Ift27, Ift74, Ift88, Ik, Ilf2, Itgb3bp, Josd2, Jun, Kank3, Kdm1b, Larp7, Lars2, Lgals7, Lhx8, Mael, Mageb4, Map1lc3a, Mat2b, Meg3, Mis18a, Morn2, Mrfap1, Mrps9, mt-Atp6, mt-Atp8, mt-Co1, mt-Co2, mt-Co3, mt-Cytb, Mtfr1l, mt-Nd1, mt-Nd2, mt-Nd3, mt-Nd4, mt-Nd4l, mt-Nd5, mt-Nd6, Mycbp, Nabp1, Nae1, Nasp, Ndufa1, Ndufa5, Ninj1, Nup62cl, Osbpl8, Otub1, Paip1, Pet2, Pgam1, Phf7, Pkig, Pnrc1, Ppid, Pptc7, Prps1, Psmd10, Psmg4, Ptcd2, Raph1, Rdx, Reep5, Rnpc3, Rpl29, Rpl39l, Sar1b, Selenow, Serpinb6a, Sf3b2, Snrpb2, Snrpd2, Snx4, Sohlh1, Spa17, Spata33, Stard7, Stip1, Stk25, Stub1, Sumo3, Syce3, Sycp1, Sycp3, Taf7l, Tdrd1, Tex15, Tmem14c, Tmsb10, Tmsb4x, Tpgs2, Trappc2l, Tsacc, Tsc22d1, Tsc22d3, Tuba1a, Tulp4, Uba52, Ube2d3, Ube2g2, Ube2w, Uchl1, Usp7, Vapa, Vkorc1, Vps35, Wdr36, Xist, Xlr, Xrcc5, Yeats4, Zc2hc1a, Zcwpw1, Zfp580, Znhit6, Znrd1as. Download a pdf of this figure [here](report_figures/CatScatterDying_nurse_cell_Niu_et_at_2020.V20201005.pdf).

Figure 25A: Category Scatter showing gene category Dying_nurse_cell_Niu_et_at_2020. The following genes of this dataset are represented in this figure: 1700018B24Rik, 2410015M20Rik, AC160336.1, Acat1, Aen, Aig1, Akap17b, Akr1b3, Aktip, Anp32a, Anxa7, Apoo, Aptx, Arf5, Arl3, Arpc5l, Asnsd1, Asz1, Atp6v0e, Atp6v1d, AY036118, Bcl7a, Bri3, Card19, Cast, Ccdc115, Ccdc152, Ccdc73, Ccnb1ip1, Ccnb2, Ccnb3, Cdk2, Cetn4, Cirbp, Cks2, Copb2, Coprs, Cops5, Cul3, Cyb5r3, Dcps, Ddah1, Dhx40, Dnajb9, Dync1i2, Efcab2, Eif1ax, Eif1b, Eif3c, Eif3e, Eif4a1, Eif4a2, Eif4e, Epb41l2, Eps15, Ergic3, Fam50a, Figla, G3bp2, Gc, Gdpd1, Gid4, Gm11985, Gm17018, Gm26917, Gm27164, Gm42418, Gnpda2, Gpbp1, Gsdme, Gstm2, Gtsf1, H19, Hba-a1, Hba-a2, Hbb-bs, Hbb-bt, Hes1, Hist1h1e, Hormad1, Hpf1, Hsf2, Id1, Idh3a, Ift27, Ift74, Ift88, Ik, Ilf2, Itgb3bp, Josd2, Jun, Kank3, Kdm1b, Larp7, Lars2, Lgals7, Lhx8, Mael, Mageb4, Map1lc3a, Mat2b, Meg3, Mis18a, Morn2, Mrfap1, Mrps9, mt-Atp6, mt-Atp8, mt-Co1, mt-Co2, mt-Co3, mt-Cytb, Mtfr1l, mt-Nd1, mt-Nd2, mt-Nd3, mt-Nd4, mt-Nd4l, mt-Nd5, mt-Nd6, Mycbp, Nabp1, Nae1, Nasp, Ndufa1, Ndufa5, Ninj1, Nup62cl, Osbpl8, Otub1, Paip1, Pet2, Pgam1, Phf7, Pkig, Pnrc1, Ppid, Pptc7, Prps1, Psmd10, Psmg4, Ptcd2, Raph1, Rdx, Reep5, Rnpc3, Rpl29, Rpl39l, Sar1b, Selenow, Serpinb6a, Sf3b2, Snrpb2, Snrpd2, Snx4, Sohlh1, Spa17, Spata33, Stard7, Stip1, Stk25, Stub1, Sumo3, Syce3, Sycp1, Sycp3, Taf7l, Tdrd1, Tex15, Tmem14c, Tmsb10, Tmsb4x, Tpgs2, Trappc2l, Tsacc, Tsc22d1, Tsc22d3, Tuba1a, Tulp4, Uba52, Ube2d3, Ube2g2, Ube2w, Uchl1, Usp7, Vapa, Vkorc1, Vps35, Wdr36, Xist, Xlr, Xrcc5, Yeats4, Zc2hc1a, Zcwpw1, Zfp580, Znhit6, Znrd1as. Download a pdf of this figure here.

**Figure 25B:** Dotplot showing gene category Dotplot_Dying_nurse_cell_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Hbb-bs, Gm26917, Hba-a1, Hba-a2, Gm42418, mt-Nd1, Hbb-bt, mt-Cytb, mt-Atp6, mt-Co3, mt-Nd5, mt-Nd2, mt-Nd4, AY036118, mt-Co2, Figla, mt-Nd3, mt-Co1, Sohlh1, Sycp3, mt-Nd4l, Uchl1, Gc, mt-Atp8, Ccnb1ip1, Cirbp, Vkorc1, mt-Nd6, Lars2, Lhx8, Tmsb4x, Uba52, 1700018B24Rik, Syce3, Acat1, H19, Hormad1, Mageb4, Cetn4, Ccdc73, Xist, Gm27164, AC160336.1, Taf7l, Hes1, Gm11985, Pnrc1, Pet2, Kdm1b, Sumo3, Gpbp1, Spa17, Eif4a2, Sycp1, Tmem14c, Gdpd1, Akap17b, Dnajb9, Ube2w, Aen, Eif1b, Ccdc152, Reep5, Aptx, Mael, Nup62cl, Pkig, Ift27, Aig1, Ninj1, Ergic3, Gstm2, Vps35, Map1lc3a, Eif4a1, Tsacc, Id1, Cul3, Atp6v1d, Ndufa1, Mat2b, Rpl39l, Tmsb10, Arpc5l, Itgb3bp, Eif1ax, Aktip, Dync1i2, Stub1, Hist1h1e, Kank3, Trappc2l, Idh3a, Znhit6, Dhx40, Pptc7, Znrd1as, Josd2, Psmd10, Gm17018, Cops5, Otub1, Tsc22d3, Apoo, Prps1, Tex15, Wdr36, Xlr, Larp7, Gtsf1, Mrps9, Nae1, Eif4e, Meg3, Jun, Gsdme, Stk25, Rdx, Xrcc5, Serpinb6a, Card19, Snrpb2, Tdrd1, Vapa, Morn2, Gnpda2, Ccdc115, Paip1, Mrfap1, Arf5, Lgals7, Snx4, Anxa7, 2410015M20Rik, Ik, Epb41l2, Ube2d3, Ccnb3, Cdk2, Ift88, Asnsd1, G3bp2, Bri3, Cks2, Efcab2, Eif3e, Spata33, Tulp4, Ddah1, Ift74, Cast, Zcwpw1, Tsc22d1, Akr1b3, Mycbp, Eps15, Bcl7a, Ndufa5, Ube2g2, Psmg4, Atp6v0e, Hsf2, Stard7, Osbpl8, Mtfr1l, Arl3, Gid4, Ppid, Copb2, Phf7, Zfp580, Asz1, Nabp1, Ptcd2, Usp7, Fam50a, Stip1, Rpl29, Nasp, Tuba1a, Ilf2, Tpgs2, Selenow, Anp32a, Rnpc3, Raph1, Coprs, Zc2hc1a, Ccnb2, Dcps, Snrpd2, Eif3c, Hpf1, Mis18a, Cyb5r3, Sar1b, Pgam1, Sf3b2, Yeats4. Download a pdf of this figure [here](report_figures/Dotplot_Dying_nurse_cell_Niu_et_at_2020.V20201005.pdf).

Figure 25B: Dotplot showing gene category Dotplot_Dying_nurse_cell_Niu_et_at_2020. The following genes were found in this category and the single-cell dataset: Hbb-bs, Gm26917, Hba-a1, Hba-a2, Gm42418, mt-Nd1, Hbb-bt, mt-Cytb, mt-Atp6, mt-Co3, mt-Nd5, mt-Nd2, mt-Nd4, AY036118, mt-Co2, Figla, mt-Nd3, mt-Co1, Sohlh1, Sycp3, mt-Nd4l, Uchl1, Gc, mt-Atp8, Ccnb1ip1, Cirbp, Vkorc1, mt-Nd6, Lars2, Lhx8, Tmsb4x, Uba52, 1700018B24Rik, Syce3, Acat1, H19, Hormad1, Mageb4, Cetn4, Ccdc73, Xist, Gm27164, AC160336.1, Taf7l, Hes1, Gm11985, Pnrc1, Pet2, Kdm1b, Sumo3, Gpbp1, Spa17, Eif4a2, Sycp1, Tmem14c, Gdpd1, Akap17b, Dnajb9, Ube2w, Aen, Eif1b, Ccdc152, Reep5, Aptx, Mael, Nup62cl, Pkig, Ift27, Aig1, Ninj1, Ergic3, Gstm2, Vps35, Map1lc3a, Eif4a1, Tsacc, Id1, Cul3, Atp6v1d, Ndufa1, Mat2b, Rpl39l, Tmsb10, Arpc5l, Itgb3bp, Eif1ax, Aktip, Dync1i2, Stub1, Hist1h1e, Kank3, Trappc2l, Idh3a, Znhit6, Dhx40, Pptc7, Znrd1as, Josd2, Psmd10, Gm17018, Cops5, Otub1, Tsc22d3, Apoo, Prps1, Tex15, Wdr36, Xlr, Larp7, Gtsf1, Mrps9, Nae1, Eif4e, Meg3, Jun, Gsdme, Stk25, Rdx, Xrcc5, Serpinb6a, Card19, Snrpb2, Tdrd1, Vapa, Morn2, Gnpda2, Ccdc115, Paip1, Mrfap1, Arf5, Lgals7, Snx4, Anxa7, 2410015M20Rik, Ik, Epb41l2, Ube2d3, Ccnb3, Cdk2, Ift88, Asnsd1, G3bp2, Bri3, Cks2, Efcab2, Eif3e, Spata33, Tulp4, Ddah1, Ift74, Cast, Zcwpw1, Tsc22d1, Akr1b3, Mycbp, Eps15, Bcl7a, Ndufa5, Ube2g2, Psmg4, Atp6v0e, Hsf2, Stard7, Osbpl8, Mtfr1l, Arl3, Gid4, Ppid, Copb2, Phf7, Zfp580, Asz1, Nabp1, Ptcd2, Usp7, Fam50a, Stip1, Rpl29, Nasp, Tuba1a, Ilf2, Tpgs2, Selenow, Anp32a, Rnpc3, Raph1, Coprs, Zc2hc1a, Ccnb2, Dcps, Snrpd2, Eif3c, Hpf1, Mis18a, Cyb5r3, Sar1b, Pgam1, Sf3b2, Yeats4. Download a pdf of this figure here.

Category Feature Plot Proliferation

**Figure 26A:** Category Scatter showing gene category Proliferation.  The following genes of this dataset are represented in this figure: Mki67, Pcna. Download a pdf of this figure [here](report_figures/CatScatterProliferation.V20201005.pdf).

Figure 26A: Category Scatter showing gene category Proliferation. The following genes of this dataset are represented in this figure: Mki67, Pcna. Download a pdf of this figure here.

**Figure 26B:** Dotplot showing gene category Dotplot_Proliferation. The following genes were found in this category and the single-cell dataset: Pcna, Mki67. Download a pdf of this figure [here](report_figures/Dotplot_Proliferation.V20201005.pdf).

Figure 26B: Dotplot showing gene category Dotplot_Proliferation. The following genes were found in this category and the single-cell dataset: Pcna, Mki67. Download a pdf of this figure here.

Category Feature Plot Apoptosis

**Figure 27A:** Category Scatter showing gene category Apoptosis.  The following genes of this dataset are represented in this figure: Acin1, Acvr1c, Aifm3, Akt1, Apc, Bbc3, Bcap31, Birc2, Blcap, Bmx, Bnip1, Capn10, Casp2, Casp3, Casp6, Casp7, Casp8, Cdk5rap3, Cecr2, Cflar, Cideb, Cidec, Clspn, Dedd2, Dffa, Dffb, Dicer1, Dnase1l3, Dnase2a, Dnase2b, Dnm1l, Endog, Ern2, Fnta, Foxl2, H1f0, Hmgb2, Htra2, Kpna1, Kpnb1, Madd, Prkcd, Prkcq, Ptk2, Rock1, Satb1, Sharpin, Stk24, Taok1, Tardbp, Top2a, Xkr8. Download a pdf of this figure [here](report_figures/CatScatterApoptosis.V20201005.pdf).

Figure 27A: Category Scatter showing gene category Apoptosis. The following genes of this dataset are represented in this figure: Acin1, Acvr1c, Aifm3, Akt1, Apc, Bbc3, Bcap31, Birc2, Blcap, Bmx, Bnip1, Capn10, Casp2, Casp3, Casp6, Casp7, Casp8, Cdk5rap3, Cecr2, Cflar, Cideb, Cidec, Clspn, Dedd2, Dffa, Dffb, Dicer1, Dnase1l3, Dnase2a, Dnase2b, Dnm1l, Endog, Ern2, Fnta, Foxl2, H1f0, Hmgb2, Htra2, Kpna1, Kpnb1, Madd, Prkcd, Prkcq, Ptk2, Rock1, Satb1, Sharpin, Stk24, Taok1, Tardbp, Top2a, Xkr8. Download a pdf of this figure here.

**Figure 27B:** Dotplot showing gene category Dotplot_Apoptosis. The following genes were found in this category and the single-cell dataset: Acin1, Acvr1c, Aifm3, Akt1, Apc, Bbc3, Bcap31, Birc2, Blcap, Bmx, Bnip1, Capn10, Casp2, Casp3, Casp6, Casp7, Casp8, Cdk5rap3, Cecr2, Cflar, Cideb, Cidec, Clspn, Dedd2, Dffa, Dffb, Dicer1, Dnase1l3, Dnase2a, Dnase2b, Dnm1l, Endog, Ern2, Fnta, Foxl2, H1f0, Hmgb2, Htra2, Kpna1, Kpnb1, Madd, Prkcd, Prkcq, Ptk2, Rock1, Satb1, Sharpin, Stk24, Taok1, Tardbp, Top2a, Xkr8. Download a pdf of this figure [here](report_figures/Dotplot_Apoptosis.V20201005.pdf).

Figure 27B: Dotplot showing gene category Dotplot_Apoptosis. The following genes were found in this category and the single-cell dataset: Acin1, Acvr1c, Aifm3, Akt1, Apc, Bbc3, Bcap31, Birc2, Blcap, Bmx, Bnip1, Capn10, Casp2, Casp3, Casp6, Casp7, Casp8, Cdk5rap3, Cecr2, Cflar, Cideb, Cidec, Clspn, Dedd2, Dffa, Dffb, Dicer1, Dnase1l3, Dnase2a, Dnase2b, Dnm1l, Endog, Ern2, Fnta, Foxl2, H1f0, Hmgb2, Htra2, Kpna1, Kpnb1, Madd, Prkcd, Prkcq, Ptk2, Rock1, Satb1, Sharpin, Stk24, Taok1, Tardbp, Top2a, Xkr8. Download a pdf of this figure here.

Category Feature Plot Top5_TF_per_cluster_Markers

**Figure 28A:** Category Scatter showing gene category Top5_TF_per_cluster_Markers.  The following genes of this dataset are represented in this figure: Aebp1, Ar, Arx, Cebpb, Churc1, Cir1, Cited1, Creb3l1, Drap1, Ebf1, Edf1, Elk3, Emx2, Epas1, Esr2, Ets1, Ets2, Figla, Foxl2, Gata4, Gtf2a2, Hcls1, Hes1, Hmgb2, Id1, Id2, Irx3, Junb, Lhx8, Lhx9, Lmo7, Lsr, Mef2c, Meis2, Mta1, Mxd3, Mycl, Myocd, Myrf, Nfe2l1, Nfib, Nfix, Nr0b1, Nr2f2, Nr2f6, Nr5a2, Pa2g4, Pax8, Pbx1, Peg3, Prrx1, Pura, Smarca1, Sohlh1, Sox6, Sox9, Spi1, Supt4a, Taf11, Taf7l, Tcf19, Tcf21, Tsc22d1, Ybx1, Zbtb20, Zeb2, Zfhx3, Zkscan3. Download a pdf of this figure [here](report_figures/CatScatterTop5_TF_per_cluster_Markers.V20201005.pdf).

Figure 28A: Category Scatter showing gene category Top5_TF_per_cluster_Markers. The following genes of this dataset are represented in this figure: Aebp1, Ar, Arx, Cebpb, Churc1, Cir1, Cited1, Creb3l1, Drap1, Ebf1, Edf1, Elk3, Emx2, Epas1, Esr2, Ets1, Ets2, Figla, Foxl2, Gata4, Gtf2a2, Hcls1, Hes1, Hmgb2, Id1, Id2, Irx3, Junb, Lhx8, Lhx9, Lmo7, Lsr, Mef2c, Meis2, Mta1, Mxd3, Mycl, Myocd, Myrf, Nfe2l1, Nfib, Nfix, Nr0b1, Nr2f2, Nr2f6, Nr5a2, Pa2g4, Pax8, Pbx1, Peg3, Prrx1, Pura, Smarca1, Sohlh1, Sox6, Sox9, Spi1, Supt4a, Taf11, Taf7l, Tcf19, Tcf21, Tsc22d1, Ybx1, Zbtb20, Zeb2, Zfhx3, Zkscan3. Download a pdf of this figure here.

**Figure 28B:** Dotplot showing gene category Dotplot_Top5_TF_per_cluster_Markers. The following genes were found in this category and the single-cell dataset: Tcf21, Nfix, Nr2f2, Id2, Creb3l1, Foxl2, Nr5a2, Smarca1, Emx2, Peg3, Irx3, Hes1, Tsc22d1, Figla, Sohlh1, Taf7l, Lhx8, Id1, Hmgb2, Tcf19, Mxd3, Ybx1, Ar, Arx, Meis2, Supt4a, Edf1, Pura, Zeb2, Lsr, Lhx9, Cebpb, Lmo7, Drap1, Gtf2a2, Pa2g4, Churc1, Mycl, Esr2, Cited1, Elk3, Mef2c, Epas1, Ets2, Ets1, Prrx1, Ebf1, Zfhx3, Hcls1, Spi1, Junb, Gata4, Nfe2l1, Myocd, Pbx1, Pax8, Sox9, Nr0b1, Zbtb20, Nr2f6, Zkscan3, Cir1, Mta1, Taf11, Aebp1, Myrf, Nfib, Sox6. Download a pdf of this figure [here](report_figures/Dotplot_Top5_TF_per_cluster_Markers.V20201005.pdf).

Figure 28B: Dotplot showing gene category Dotplot_Top5_TF_per_cluster_Markers. The following genes were found in this category and the single-cell dataset: Tcf21, Nfix, Nr2f2, Id2, Creb3l1, Foxl2, Nr5a2, Smarca1, Emx2, Peg3, Irx3, Hes1, Tsc22d1, Figla, Sohlh1, Taf7l, Lhx8, Id1, Hmgb2, Tcf19, Mxd3, Ybx1, Ar, Arx, Meis2, Supt4a, Edf1, Pura, Zeb2, Lsr, Lhx9, Cebpb, Lmo7, Drap1, Gtf2a2, Pa2g4, Churc1, Mycl, Esr2, Cited1, Elk3, Mef2c, Epas1, Ets2, Ets1, Prrx1, Ebf1, Zfhx3, Hcls1, Spi1, Junb, Gata4, Nfe2l1, Myocd, Pbx1, Pax8, Sox9, Nr0b1, Zbtb20, Nr2f6, Zkscan3, Cir1, Mta1, Taf11, Aebp1, Myrf, Nfib, Sox6. Download a pdf of this figure here.

Heatmap and Dotplot for the top5 cluster defining genes

plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")

###############################################################################
## Make Heatmap                                                              ##

OsC@meta.data[["hmIdent2"]] <- paste0("C", OsC@meta.data[,Obio@parameterList$singleCellClusterString])

Idents(OsC) <- "hmIdent2"

levels <- paste0(
  "C",
  sort(unique(OsC@meta.data[,Obio@parameterList$singleCellClusterString]))
  )

levels(OsC) <- levels

## Deal with more than 5000 cells ##
if (nrow(OsC@meta.data) > 5000){
    set.seed(127)
    n.cells <- 5000
    OsC_HM <- OsC
    OsC_HM@meta.data[["HM_sel"]] <- 0
    selPos <- sample(x = nrow(OsC_HM@meta.data), size = n.cells, replace = FALSE, prob = NULL)
    OsC_HM@meta.data[selPos, "HM_sel"] <- 1
    
    OsC_HM <- subset(x = OsC_HM, subset = HM_sel == 1)
    
    subsetString <- paste0("For this heatmap 5000 cells were randomly selected from ", nrow(OsC@meta.data)," cells in the experiment. ")
} else {
    OsC_HM <- OsC
    subsetString <- ""
}

## Scale Data ##
allGenes <- rownames(x = OsC_HM@assays$RNA)
OsC_HM <- ScaleData(OsC_HM, verbose = FALSE, features=allGenes)

## For the moment: resetting heatmap list to keep it short ##
Obio@parameterList[["cat2HMplotList"]] <- list()
Obio@parameterList[["cat2HMplotList"]][["Top5_Cluster_Markers"]] <- as.vector(dfTop5$gene)

## Add top transcription factors for each cluster ##
## Get transcription factor genes ##
if (Obio@parameterList$geneIDcolumn != "mgi_symbol" & Obio@parameterList$geneIDcolumn != "hgnc_symbol") {
    queryGS <- "hgnc_symbol" 
} else {
    queryGS <- Obio@parameterList$geneIDcolumn
}


tempVec <- retrieve.gene.category.from.db(
    cat_id = "ag_lab_categories__10",
    password = db.pwd,
    gene.symbol = queryGS,
    user = Obio@parameterList$db.user,
    host = Obio@parameterList$host
)

###############################################################################
## If this is fish, translation is non-human or non-mouse, translation is necessary
if (queryGS != Obio@parameterList$geneIDcolumn){
    dfAnno <- Obio@dfGeneAnnotation
    dfAnno <- unique(dfAnno[,c("hgnc_symbol",Obio@parameterList$geneIDcolumn )])
    dfAnno <- dfAnno[dfAnno$hgnc_symbol != "", ]
    dfAnno <- dfAnno[dfAnno$hgnc_symbol %in% tempVec, ]
    tempVec <- unique(dfAnno[,Obio@parameterList$geneIDcolumn])
    tempVec <- tempVec[tempVec != ""]
}

dfHMG <- dfGeneralMarkers[dfGeneralMarkers$gene %in% tempVec, ]
dfHMGsel <- data.frame(dfHMG %>% group_by(cluster) %>% top_n(5, avg_diff))

Obio@parameterList[["cat2HMplotList"]][["Top5_TF_Cluster_Markers"]] <- as.vector(unique(dfHMGsel$gene))

## Done with translation
  



for (i in 1:length(Obio@parameterList[["cat2HMplotList"]])){
    tag <- paste0("HMM_", names(Obio@parameterList$cat2HMplotList)[i])
    textSize <- 5    
    
    plotList[[tag]] <- DoHeatmap(
                object = OsC_HM,
                features = Obio@parameterList[["cat2HMplotList"]][[i]],
                #group.by = "hmIdent",
                draw.lines =T,
                label = T,
                group.bar = TRUE,
                slot = "scale.data",
                lines.width = 2 #With of separation lines in 'cells'
                #slim.col.label = TRUE,
                #remove.key = removeKey
            # ) + theme(legend.position = "none"
            ) + theme(text = element_text(size=textSize)
            ) + scale_fill_gradientn(colors = c("blue", "white", "red"))
    
    
    ## Save to file ##
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)

            pdf(FN)
                print(plotList[[tag]])
            dev.off()

            ## Create R markdown chunk ##
            figLegend <- paste0(
                "**Figure ",
                figureCount,
                ":** Heatmap showing the most distinct marker genes in each cluster. " , subsetString,
                "Download a pdf of this figure [here](", FNrel,"). "
            )

            figureCount <- figureCount + 1

            NewChnk <- paste0(
                "#### Heatmap_var_genes",
                "\n```{r Heatmap_", tag,
                ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"
            )

        
    
    chnkVec <- c(
        chnkVec,
        NewChnk
)
    
}



rm(OsC_HM)


## Done making Heatmap                                                       ##
###############################################################################

###############################################################################
## Make dotplot 

Idents(OsC) <- "hmIdent2"

levels <- paste0(
    "C",
    sort(unique(OsC@meta.data[,Obio@parameterList$singleCellClusterString]))
 )

levels(OsC) <- levels


dpGenes <- as.vector(unique(dfTop5$gene))

if (length(dpGenes) >= 50){
  dpGenes <- as.vector(unique(dfTop1$gene))  
}


#dpGenes <- rev(dpGenes[!(duplicated(dpGenes))])

tag <- paste0("Dotplot_", "Var_Genes")
textSize <- 5

plotList[[tag]] <- DotPlotSB(
        object = OsC,
        features = dpGenes,
        #cols = cols,
        group.by = NULL,
        split.by = NULL,
        dot.scale = 4,
        col.min = 0,
        col.max = 5
    ) + ggtitle(gsub("_", "", tag)) + coord_fixed() + coord_flip()
    

## Save to file ##
            FNbase <- paste0(tag, VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)

            pdf(FN)
                print(plotList[[tag]])
            dev.off()

            ## Create R markdown chunk ##
            figLegend <- paste0(
                "**Figure ",
                figureCount,
                ":** Dotplot showing showing selected marker genes. ",
                "Download a pdf of this figure [here](", FNrel,"). "
            )

            figureCount <- figureCount + 1

            NewChnk <- paste0(
                "#### Dotplot Markers",
                "\n```{r Dotplot_var_",
                ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"
            )

        
    
    chnkVec <- c(
        chnkVec,
        NewChnk
)    
    
## Done making dotplot                                                       ##
###############################################################################

############################
## Make cat feature plot

## done making cat feature plot
################################
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

Heatmap_var_genes

**Figure 29:** Heatmap showing the most distinct marker genes in each cluster. For this heatmap 5000 cells were randomly selected from 22804 cells in the experiment. Download a pdf of this figure [here](report_figures/HMM_Top5_Cluster_Markers.V20201005.pdf).

Figure 29: Heatmap showing the most distinct marker genes in each cluster. For this heatmap 5000 cells were randomly selected from 22804 cells in the experiment. Download a pdf of this figure here.

Heatmap_var_genes

**Figure 30:** Heatmap showing the most distinct marker genes in each cluster. For this heatmap 5000 cells were randomly selected from 22804 cells in the experiment. Download a pdf of this figure [here](report_figures/HMM_Top5_TF_Cluster_Markers.V20201005.pdf).

Figure 30: Heatmap showing the most distinct marker genes in each cluster. For this heatmap 5000 cells were randomly selected from 22804 cells in the experiment. Download a pdf of this figure here.

Dotplot Markers

**Figure 31:** Dotplot showing showing selected marker genes. Download a pdf of this figure [here](report_figures/Dotplot_Var_Genes.V20201005.pdf).

Figure 31: Dotplot showing showing selected marker genes. Download a pdf of this figure here.

Cluster-defining Genes Table

library(DT)

###############################################################################
## Calculate percentages of expressed genes                                  ##
DefaultAssay(OsC) <- "RNA"
my_genes <- rownames(x = OsC@assays$RNA)

exp <- FetchData(OsC, my_genes)

ExprMatrix <- round(as.matrix(colMeans(exp  > 0)) *100,1)
colnames(ExprMatrix)[1] <- "count_cut_off"
dfExprMatrix <- data.frame(ExprMatrix)
dfExprMatrix[["gene"]] <- row.names(dfExprMatrix)

Obio@dataTableList[["dfPercCellsExpr"]] <- dfExprMatrix

## Done calculating percentages of expressed gens                            ##
###############################################################################

###############################################################################
## Create one table per cluster                                              ##
## Add expressed in N percent cells ##
dfPercCellsExpr <- Obio@dataTableList$dfPercCellsExpr

dfDat <- Obio@dataTableList$dfGeneralMarkersFilt
dfDat <- dfDat[,c("cluster", "gene", "avg_diff", "pct.1", "pct.2","myAUC", "power")]
dfDat$avg_diff <- round(dfDat$avg_diff,2)
dfDat$cluster <- paste0("Cluster_",dfDat$cluster, "_C")
dfDat$gene <- substr(dfDat$gene,1,50)

dfDat[["uniqueMarker"]] <- as.character(!duplicated(dfDat$gene))
dfDat$uniqueMarker <- substr(dfDat$uniqueMarker, 1,1)

dtList <- list()

tabClusters <- sort(unique(dfDat$cluster))
chnkVec <- as.vector(NULL, mode="character")
    
linkGeneView <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/gene-view")
linkFeatureView <- paste0("https://",urlString,"/mdata/",Obio@parameterList$project_id,"/html/FeatureView.html")

#for (i in 1:length(tabClusters)){
    #tabLegend = paste0("**Table: ** Positive and negative marker genes for ", tabClusters[i])
    tabLegend = paste0("**Table: ** Positive and negative cluster-defining marker genes. Perc_Cells_Expr: Percentage of total cells expressing gene X. Enr in Cluster: Enrichment of gene X in cluster Y. To collapse the table to one particular cluster, type the name of the cluster in the search box.",
    "Use the [GeneView](",linkGeneView,") or [FeatureView](",linkFeatureView,") functionalities to examine individual genes in more detail. "                   
    )
    #dfTempDat <- dfDat[dfDat$cluster == tabClusters[i],]
    dfTempDat <- dfDat
    
    ## Percent expressed genes 
    dfTempDat <- merge(
        dfTempDat, 
        Obio@dataTableList$dfPercCellsExpr,
        by.x = "gene",
        by.y = "gene"
    )
    
    names(dfTempDat) <- gsub("count_cut_off", "Perc_Cells_Expr",names(dfTempDat))
    names(dfTempDat) <- gsub("myAUC", "AUC", names(dfTempDat))
    names(dfTempDat) <- gsub("[.]", "", names(dfTempDat))
    
    #dtList[[paste0("Table",i)]] <- datatable(dfDat,rownames = FALSE) 
    if (Obio@parameterList$host == "10.27.241.234"){
      urlString <- "biologic.thecrick.org"
    } else {
      urlString <- "biologic.crick.ac.uk"
    }
    
    
    dfTempDat$gene <- paste0("<a href='https://",urlString,"/",Obio@parameterList$project_id,"/gene-view?query=",dfTempDat$gene,"&exact=TRUE' target='_blank'>", dfTempDat$gene, "</a>")
    
    NewChnk <- paste0(
        "#### ", names(dtList),
        "\n```{r datatable_",
        i,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        tabLegend,"'}\n",
        "\n",
        "\n datatable(dfTempDat,rownames = FALSE,  escape = FALSE)",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
            chnkVec,
            NewChnk
        )
#}

## Done creating one table per cluster                                      ##
##############################################################################
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

Table: Positive and negative cluster-defining marker genes. Perc_Cells_Expr: Percentage of total cells expressing gene X. Enr in Cluster: Enrichment of gene X in cluster Y. To collapse the table to one particular cluster, type the name of the cluster in the search box.Use the GeneView or FeatureView functionalities to examine individual genes in more detail.

Define categories enriched in individual clusters

# library(knitr)
# library(ggplot2)
# 
# #save.image("temp.RData")
# 
# DefaultAssay(OsC) <- "RNA"
# 
# dfGeneralMarkersFilt <- data.frame(
#     Obio@dataTableList$dfGeneralMarkersFilt,
#     stringsAsFactors = FALSE
# )
# 
# dfGeneralMarkersFilt$cluster <- paste0("C", dfGeneralMarkersFilt$cluster)
# 
# clusterIDs <- as.character(unique(dfGeneralMarkersFilt$cluster))
# 
# plotList <- list()
# chnkVec <- as.vector(NULL, mode = "character")
# 
# for (j in 1:length(clusterIDs)){
#     dfEnrichSel <- dfGeneralMarkersFilt[dfGeneralMarkersFilt$cluster == as.vector(clusterIDs[j]), ]
#     #dfEnrichSel[["gene"]] <- row.names(dfEnrichSel)
#     
#     posTestGeneSet <- as.vector(
#         unique(
#             dfEnrichSel[dfEnrichSel$avg_diff > lgFCthreshold, "gene"]
#         )
#     )
#     
#     
#     negTestGeneSet <- as.vector(
#         unique(
#             dfEnrichSel[dfEnrichSel$avg_diff < -lgFCthreshold, "gene"]
#         )
#     )
#     
#     ## Get background gene set ##
#     #backgroundGeneVec <- row.names(OsC[["RNA"]]@counts)
#     if ((length(posTestGeneSet) >= 3) |(length(negTestGeneSet) >= 3)){
#         library(enrichR)
#         topMaxCat <- 10
#         dbs <- listEnrichrDbs()
#         
#         dbs <- c("GO_Biological_Process_2017")
#         
#         
#         PosEnriched <- enrichr(posTestGeneSet, dbs)
#         
#         for (i in 1:length(dbs)){
#             dfTemp <- PosEnriched[[dbs[i]]]
#             
#             if (i ==1){
#                 dfPosEnriched <- dfTemp
#             } else {
#                 dfPosEnriched <- rbind(
#                     dfPosEnriched,
#                     dfTemp
#                 )
#             }
#             
#         }
#         
#         dfPosEnriched[["log10FDR"]] <- -1*log10(dfPosEnriched$Adjusted.P.value)
#         dfPosEnriched <- dfPosEnriched[order(-dfPosEnriched$log10FDR),]
#         dfPosEnriched <- na.omit(dfPosEnriched)
#         
#         ## Negative Side ##
#         NegEnriched <- enrichr(negTestGeneSet, dbs)
#         
#         for (i in 1:length(dbs)){
#             dfTemp <- NegEnriched[[dbs[i]]]
#             
#             if (i ==1){
#                 dfNegEnriched <- dfTemp
#             } else {
#                 dfNegEnriched <- rbind(
#                     dfNegEnriched,
#                     dfTemp
#                 )
#             }
#             
#         }
#         
#         
#         dfNegEnriched[["log10FDR"]] <- -1*log10(dfNegEnriched$Adjusted.P.value)
#         dfNegEnriched <- dfNegEnriched[order(-dfPosEnriched$log10FDR),]
#         dfNegEnriched <- na.omit(dfNegEnriched)
#         
#         dfNegSel <- dfNegEnriched
#         if (nrow(dfNegSel) > topMaxCat){
#             dfNegSel <- dfNegSel[1:topMaxCat,]
#         }
#         
#         dfPosSel <- dfPosEnriched
#         if (nrow(dfPosSel) > topMaxCat){
#             dfPosSel <- dfPosSel[1:topMaxCat,]
#         }
#         
#         if ((nrow(dfNegEnriched) > 0) | (nrow(dfPosEnriched) > 0)){
#             
#             
#             dfNegSel$log10FDR <- -1* dfNegSel$log10FDR
#             
#             dfSel <- rbind(
#                 dfNegSel,
#                 dfPosSel
#             )
#             
#             dfSel <- na.omit(dfSel)
#             dfSel <- dfSel[order(dfSel$log10FDR),]
#             dfSel$log10FDR <- round(dfSel$log10FDR, 2)
#             
#             dfSel[["Category"]] <- ""
#             dfSel[dfSel$log10FDR >= 0, "Category"] <- "Enr."
#             dfSel[dfSel$log10FDR < 0, "Category"] <- "Depl."
#             
#             for (k in 1:nrow(dfSel)){
#                 if (nchar(dfSel[k, "Term"]) > 50 & length(grep("\\(GO", as.vector(dfSel[k, "Term"]))) > 0){
#                     part1 <- unlist(strsplit(as.vector(dfSel[k, "Term"]), "\\(GO"))[1]
#                     part1 <- substr(part1, 1, 45)
#                     part2 <- unlist(strsplit(as.vector(dfSel[k, "Term"]), "\\(GO"))[2]
#                     part2 <- paste0("\\(GO", part2)
#                     
#                     if (nchar(part1) > 40 ){
#                         dfSel[k, "Term"] <- paste0(part1, " \\n", part2)
#                     } else { 
#                         dfSel[k, "Term"] <- paste0(part1, " ", part2)
#                     }
#                 }
#             }
#             
#             
#             #dfSel$Term <- gsub("\\(GO", "\\\n\\(GO", dfSel$Term)
#             
#             dfSel$Term <- factor(dfSel$Term, levels = unique(dfSel$Term))
#             
#             plotList[[paste0("ENR_", j)]] <- ggplot(
#                 data=dfSel, aes(x= Term, y=log10FDR, fill=Category, order=log10FDR)
#             ) + geom_bar(stat="identity", colour="black"
#             ) + coord_flip() +scale_fill_manual(values=c("yellow", "blue"))  +  theme(
#                 axis.text.y   = element_text(size=8),
#                 axis.text.x   = element_text(size=8),
#                 axis.title.y  = element_text(size=8),
#                 axis.title.x  = element_text(size=8),
#                 axis.line = element_line(colour = "black"),
#                 panel.border = element_rect(colour = "black", fill=NA, size=1),
#                 plot.title = element_text(hjust = 0.5, size = 12)
#             )  + labs(title = paste0("Cluster ", clusterIDs[j]," enriched genes") ,y = "-log10(FDR)", x = ""
#             ) + geom_hline(yintercept = c(-log10(0.05), log10(0.05)), color = "red", size=0.5, lty=2
#             ) + geom_hline(yintercept = 0, color = "black", size=0.5
#             )
#             cat("  \n")
#             
#             
#             
#             ## Save to file ##
#             FNbase <- paste0("Cluster_", clusterIDs[j],".enriched.genes", VersionPdfExt)
#             FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
#             FNrel <- paste0("report_figures/", FNbase)
#             
#            
#             pdf(FN)
#             print(plotList[[paste0("ENR_", j)]])
#             dev.off()
#             
#             ## Create R markdown chunk ##
#             link <- paste0(
#                 "https://",urlString,"/",
#                 Obio@parameterList$project_id,
#                 "/category-view?category_type=GO-BP"
#             )
#             
#             
#             figLegend <- paste0(
#                 "**Figure ", 
#                 figureCount, 
#                 "**: GO-BP category enrichment analysis for genes that are <font color = \\'yellow\\'>higher</font> and <font color = \\'blue\\'>lower</font> expressed in Cluster ", 
#                 clusterIDs[j],
#                 " as compared to all other clusters. Download a pdf of this figure [here](", FNrel, "). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](",link,") and find the above categories using the search box."
#             )
#             figureCount <- figureCount + 1 
#             
#             NewChnk <- paste0(
#                 "#### ", clusterIDs[j],
#                 "\n```{r enrchr_cluster_",
#                 clusterIDs[j],", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
#                 figLegend,"'}\n",
#                 "\n",
#                 "\n print(plotList[['",paste0("ENR_", j),"']])",
#                 "\n cat(  '\n')",
#                 "\n\n\n```\n"   
#             )
#         }
#     }
#     chnkVec <- c(
#         chnkVec,
#         NewChnk
#     )
# }
###############################################################################
## Do category enrichment on clusters                                        ##
#cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
## Done doing enrichment on clusters                                         ##
###############################################################################

Gene Set Heatmaps

## Two options: full heatmap and averaged heatmaps
## https://satijalab.org/seurat/v3.0/interaction_vignette.html

###############################################################################
## Add percentage expressed genes                                            ##
# DefaultAssay(OsC) <- "RNA"
# my_genes <- rownames(x = OsC@assays$RNA)
# 
# exp <- FetchData(OsC, my_genes)
# 
# ExprMatrix <- round(as.matrix(colMeans(exp  > 0)) *100,1)
# colnames(ExprMatrix)[1] <- "count_cut_off"
# dfExprMatrix <- data.frame(ExprMatrix)
# dfExprMatrix[["gene"]] <- row.names(dfExprMatrix)
# 
# Obio@dataTableList[["dfPercCellsExpr"]] <- dfExprMatrix
# 
# hmRelevantGenes <- as.vector(unique(dfExprMatrix[dfExprMatrix$count_cut_off > Obio@parameterList$singleCellPercExpressedMinCutOff, "gene"]))
# 
# 
# 
# ## Done adding percentage expressed                                          ##
# ###############################################################################
# 
# 
# ###############################################################################
# ## Make plot according to reference categories                               ##
# allGenes <- rownames(x = OsC@assays$RNA)
# OsC <- ScaleData(OsC, verbose = FALSE, features=allGenes)
# 
# DefaultAssay(OsC) <- "RNA"
# 
# 
# ## Add heatmap identities to meta.data ##
# 
OsC@meta.data[["hmIdent"]] <- paste0(
     OsC@meta.data[,Obio@parameterList$singleCellClusterString],
     "_",
     substr(OsC@meta.data$sampleID,1,10)
     
)
# 
if (length(unique(OsC@meta.data$hmIdent)) > 25){
     OsC@meta.data[["hmIdent"]] <- OsC@meta.data[,Obio@parameterList$singleCellClusterString]
     
}
# 
# Idents(OsC) <- "hmIdent"
# 
# ## Done adding heatmap identities to meta.data ##
# 
# printPdf <- TRUE
# referenceList <- Obio@dataTableList$referenceList

# for (i in 1:length(referenceList)){
#     HMname <- names(referenceList)[i]
#     geneVec <- unique(referenceList[[i]][referenceList[[i]] %in% rownames(x = OsC@assays$RNA)])
#     
#     if (length(geneVec) > 50){
#         geneVec <- geneVec[geneVec %in% hmRelevantGenes]
#     }
#     
#     
#     ## Do Heatmap ##
#     if (length(geneVec) < 1500 & length(geneVec) > 2){
#         Idents(OsC) <- "hmIdent"
#         HMname <- names(referenceList)[i]
#         cat("\n")
#         cat(paste0("**Heatmap ", HMname,"**"))
#         cat("\n")
#         cat("\n")
#         ## Cluster genes ##
#         HMgenes <- referenceList[[i]]
#         #dfCluster <- OsC@assays$integrated
#         Mexpr <- GetAssayData(object = OsC, assay.type = "integrated", slot = "scale.data")
#         HMgenesSel <- HMgenes[HMgenes %in% row.names(Mexpr)]
#         
#         if (length(HMgenesSel) > 2){
#             Mexpr <- Mexpr[HMgenesSel,]    
#             
#             pdf(paste0("temp", VersionPdfExt))
#             hmRes <- make.hm(
#                 m.df1 = Mexpr, 
#                 filename = "", 
#                 k.number = 1, 
#                 n.colors = 1000, 
#                 hclust.method = "complete", 
#                 dist.method = "euclidean", 
#                 main = "",
#                 Colv = TRUE,
#                 showRowNames = TRUE,
#                 showColNames = F,
#                 plotSeparationLines = FALSE
#             )
#             dev.off()
#             
#             orderedGenes <- as.vector(unique(row.names(hmRes$sorted)))
#             
#             if (length(unique(OsC@meta.data$hmIdent)) > 10){
#                 removeKey <- TRUE
#             } else {
#                 removeKey <- FALSE
#             }
#             
#             if (length(orderedGenes) <= 50){
#                 label = TRUE
#             } else {
#                 label = FALSE
#             }
#             
#             p1 <- DoHeatmap(
#                 object = OsC, 
#                 features = orderedGenes,
#                 #group.by = "hmIdent",
#                 draw.lines =T,
#                 label = label,
#                 group.bar = TRUE,
#                 slot = "scale.data",
#                 lines.width = 2 #With of separation lines in 'cells'
#                 #slim.col.label = TRUE, 
#                 #remove.key = removeKey
#             ) + theme(legend.position = "none")
#             
#             print(p1)
#             
#             ## Save to file ##
#             FNbase <- paste0("HM", HMname, VersionPdfExt)
#             FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
#             FNrel <- paste0("report_figures/", FNbase)
#             
#             pdf(FN)
#                 print(p1)
#             dev.off()
#             
#             cat("\n")
#             cat(paste0('Download a pdf of this figure [here](', FNrel, '). '))
#             cat("\n")
#             cat("\n")
#             
#         }
#     }
# }


## Done making plots according to gene categories                            ##
###############################################################################

Calculate Average expression

## Ensure that hmIdent exists ##

## Want to average by cluster and subset by sampleID ##



OsC@meta.data[["clustIdent"]] <- paste0(
    "C_", OsC@meta.data[,Obio@parameterList$singleCellClusterString]
)


Idents(OsC) <- "clustIdent"
if (length(grep("sampleID", names(OsC@meta.data))) > 0){
    cluster.averages <- AverageExpression(OsC, return.seurat = TRUE, add.ident = "sampleID")
    
    
} else {
    cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)
}

Idents(OsC) <- "sampleID"


## Retrieved Scaled data ##
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr <- dfAvgExpr[,sort(names(dfAvgExpr))]
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)

dfAvgScaledData <- data.frame(cluster.averages[["RNA"]]@scale.data)
dfAvgScaledData <- dfAvgScaledData[,sort(names(dfAvgScaledData))]
dfAvgScaledData[["gene"]] <- row.names(dfAvgScaledData)


Obio@dataTableList[["dfAvglg10ExprByClusterBySample"]] <- dfAvgExpr
Obio@dataTableList[["dfAvglg10ExprByClusterBySampleScaled"]] <- dfAvgScaledData
###############################################################################

###############################################################################
## Average by Cluster                                                        ##
Idents(OsC) <- "clustIdent"

Idents(OsC) <- factor(Idents(OsC), levels = sort(levels(OsC)))

cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)

dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
dfAvgExpr <- dfAvgExpr[,sort(names(dfAvgExpr))]
dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)

dfAvgScaledData <- data.frame(cluster.averages[["RNA"]]@scale.data)
dfAvgScaledData <- dfAvgScaledData[,sort(names(dfAvgScaledData))]
dfAvgScaledData[["gene"]] <- row.names(dfAvgScaledData)


Obio@dataTableList[["dfAvglg10ExprPerCluster"]] <- dfAvgExpr
Obio@dataTableList[["dfAvglg10ExprPerClusterScaled"]] <- dfAvgScaledData

## Done Average by Cluster                                                   ##
###############################################################################

###############################################################################
## Average gene expression by sample                                         ##


Idents(OsC) <- "sampleID"
cluster.averages <- AverageExpression(OsC, return.seurat = TRUE)

## Retrieved Scaled data ##
dfAvgExpr <- data.frame(cluster.averages[["RNA"]]@data)
selVec <- names(dfAvgExpr)

dfAvgExpr[["gene"]] <- row.names(dfAvgExpr)
selVec <- c("gene", selVec)

dfAvgExpr <- dfAvgExpr[,selVec]


dfAvgScaledData <- data.frame(cluster.averages[["RNA"]]@scale.data)
selVec <- names(dfAvgScaledData)

dfAvgScaledData[["gene"]] <- row.names(dfAvgScaledData)
selVec <- c("gene", selVec)

dfAvgScaledData <- dfAvgScaledData[,selVec]


Obio@dataTableList[["dfAvglg10ExprBySample"]] <- dfAvgExpr
Obio@dataTableList[["dfAvglg10ExprBySampleScaled"]] <- dfAvgScaledData


## Done average gene expression by sample                                    ##
###############################################################################

## Diagnostic plot ##
#DoHeatmap(cluster.averages, features = c("LY75", "PSMB11", "CCL25", "CD274"), size = 3, draw.lines = FALSE, slot = "scale.data")



## Create Plots ##
# Create temp files for plotting #


# for (i in 1:length(clusterIDs)){
#     
#     
#     ## Select Marker genes ##
#     dfTemp <- Obio@dataTableList$dfGeneralMarkersFilt
#     dfTemp <- dfTemp[dfTemp$cluster == clusterIDs[i], ]
#     posGenes <- as.vector(unique(dfTemp[dfTemp$direction == "positive", "gene"]))
#     negGenes <- as.vector(unique(dfTemp[dfTemp$direction == "negative", "gene"]))
#     
#     avgTemp <- dfRes[,c("gene", names(dfRes)[grep(paste0("Cl_", clusterIDs[i]), names(dfRes))])]
#     avgTemp$Selection <- ""
#     avgTemp[avgTemp$gene %in% posGenes, "Selection"] <- "+"
#     avgTemp[avgTemp$gene %in% negGenes, "Selection"] <- "-"
#     names(avgTemp) <- c("gene", "Ctrl", "Prad", "Selection")
#     
#     df_layer_1 <- avgTemp[avgTemp$Selection =="",]
#     df_layer_2 <- avgTemp[avgTemp$Selection =="+",]
#     df_layer_3 <- avgTemp[avgTemp$Selection =="-",]
#     
#     
#     p1 <- ggplot(
#     ) + geom_point(data = df_layer_1, aes(Ctrl, Prad), fill = "black", shape = 21
#     ) + geom_point(data = df_layer_2, aes(Ctrl, Prad), fill = "red", shape = 21
#     ) + geom_point(data = df_layer_3, aes(Ctrl, Prad), fill = "blue", shape = 21
#     ) + ggtitle(paste0("Cluster ", clusterIDs[i])
#     ) +  theme(
#             axis.text.y   = element_text(size=8),
#             axis.text.x   = element_text(size=8),
#             axis.title.y  = element_text(size=8),
#             axis.title.x  = element_text(size=8),
#             axis.line = element_line(colour = "black"),
#             panel.border = element_rect(colour = "black", fill=NA, size=1),
#             plot.title = element_text(hjust = 0.5, size = 12)
#         )
#     
# 
#     #p1 <- LabelPoints(plot = p1, points = genes.to.label, repel = TRUE)
#     cat("\n");cat(paste0("#### Scatterplot Cluster ", clusterIDs[i]));cat("\n")
#     print(p1)
#     
#     ## Save to file ##
#     FNbase <- paste0("scatterplot.",clusterIDs[i],".highlighted.pdf")
#     FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
#     FNrel <- paste0("report_figures/", FNbase)
#     
#     pdf(FN)
#         print(p1)
#     dev.off()
#     
#     
#     cat("Download a pdf of this figure [here](",FNrel,"). ")
#     
# }

Example Feature Plots

DefaultAssay(OsC) <- "RNA"

cat(paste0(
    "Feature plots for any gene in this experiment can be viewed [**here**](http://shiny.thecrick.org/babs/boeings/",Obio@parameterList$project_id,"_app/). This link will only work when you are on the Crick network or VPN-connected to the Crick. "
))

Feature plots for any gene in this experiment can be viewed here. This link will only work when you are on the Crick network or VPN-connected to the Crick.

plotGenes <- c(
    Obio@dataTableList$referenceList$integrated_top30var[1:10]
)

plotParts <- ceiling(length(plotGenes)/2)

chnkVec <- as.vector(NULL, mode = "character")
plotListF <- list()

for (i in 1:plotParts){
    tag1 <- paste0("Featureplot_",i)
    
    featureGenes <- c(plotGenes[((2*i)-1)], plotGenes[((2*i))])
    
    plotListF[[tag1]] <- FeaturePlot(
        OsC,
        features = featureGenes,
        #split.by = "orig.ident",
        reduction = Obio@parameterList$primReduction
    )
    
    
    ## Save to file ##
    FNbase <- paste0("Featureplot.", plotGenes[((2*i)-1)], ".", plotGenes[((2*i))],".", VersionPdfExt)
        FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotListF[[tag1]])
    dev.off()
    
    linkFeatureView <- paste0("https://",urlString,"/mdata/",Obio@parameterList$project_id,"/html/FeatureView.html")

    figLegend <-  paste0(
        "**Figure ", 
        figureCount, 
        ":** Gene expression plot for genes ", 
        plotGenes[((2*i)-1)], 
        " and ", 
        plotGenes[((2*i))],".",
        " Results for any other gene may be plotted in [FeatureView](",linkFeatureView,")."
    )
      
    NewChnk <- paste0(
        "#### Featureplot ", plotGenes[((2*i)-1)], " and ",plotGenes[((2*i))],
        "\n```{r FeaturePlot_", i,
        ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figLegend,"'}\n",
        "\n",
        "\n print(plotListF[['",tag1,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )

}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

Featureplot Hbb-bs and Hba-a1

**Figure 32:** Gene expression plot for genes Hbb-bs and Hba-a1. Results for any other gene may be plotted in [FeatureView](https://biologic.crick.ac.uk/mdata/rll358A/html/FeatureView.html).

Figure 32: Gene expression plot for genes Hbb-bs and Hba-a1. Results for any other gene may be plotted in FeatureView.

Featureplot Hbb-bt and Hba-a2

**Figure 32:** Gene expression plot for genes Hbb-bt and Hba-a2. Results for any other gene may be plotted in [FeatureView](https://biologic.crick.ac.uk/mdata/rll358A/html/FeatureView.html).

Figure 32: Gene expression plot for genes Hbb-bt and Hba-a2. Results for any other gene may be plotted in FeatureView.

Featureplot Dppa3 and Ooep

**Figure 32:** Gene expression plot for genes Dppa3 and Ooep. Results for any other gene may be plotted in [FeatureView](https://biologic.crick.ac.uk/mdata/rll358A/html/FeatureView.html).

Figure 32: Gene expression plot for genes Dppa3 and Ooep. Results for any other gene may be plotted in FeatureView.

Featureplot Xdh and Gtsf1

**Figure 32:** Gene expression plot for genes Xdh and Gtsf1. Results for any other gene may be plotted in [FeatureView](https://biologic.crick.ac.uk/mdata/rll358A/html/FeatureView.html).

Figure 32: Gene expression plot for genes Xdh and Gtsf1. Results for any other gene may be plotted in FeatureView.

Featureplot Padi6 and Uchl1

**Figure 32:** Gene expression plot for genes Padi6 and Uchl1. Results for any other gene may be plotted in [FeatureView](https://biologic.crick.ac.uk/mdata/rll358A/html/FeatureView.html).

Figure 32: Gene expression plot for genes Padi6 and Uchl1. Results for any other gene may be plotted in FeatureView.

PCA Gene and Cell Plots

figureCount <- figureCount + 1
# An alternative heuristic method generates an 'Elbow plot': a ranking of principle components based on the percentage of variance explained by each one (`ElbowPlot` function). In this example, we can observe an 'elbow' around PC9-10, suggesting that the majority of true signal is captured in the first 10 PCs.

ElbowPlot(object = OsC) +  theme(
    axis.text.y   = element_text(size=8),
    axis.text.x   = element_text(size=8),
    axis.title.y  = element_text(size=8),
    axis.title.x  = element_text(size=8),
    axis.line = element_line(colour = "black"),
    panel.border = element_rect(colour = "black", fill=NA, size=1),
    plot.title = element_text(hjust = 0.5, size = 12)
) + ggtitle(paste0("Variance per PCA Dimension"))
**Figure, 33: Elbowplot** Variance explained per PCA dimension

Figure, 33: Elbowplot Variance explained per PCA dimension

## Plot variance per PCA dimension ##
## Add PCA coordinates ##
dfTemp <- data.frame(OsC@reductions$pca@cell.embeddings)[, 1:20]
OsC <- addDf2seuratMetaData(
    obj = OsC, 
    dfAdd = dfTemp
)


## Add UMAP coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$umap@cell.embeddings)

OsC <- addDf2seuratMetaData(
    obj = OsC, 
    dfAdd = dfAdd
)

## Add tSNE coordinates to Metadata ##
dfAdd <- data.frame(OsC@reductions$tsne@cell.embeddings)

OsC <- addDf2seuratMetaData(
    obj = OsC, 
    dfAdd = dfAdd
)


Obio@dataTableList[["meta.data"]] <- OsC@meta.data

xVec <- c("PC_1","PC_3","PC_5","PC_7","PC_9","PC_11","PC_13","PC_15","PC_17","PC_19")
yVec <- c("PC_2","PC_4","PC_6","PC_8","PC_10","PC_12","PC_14","PC_16","PC_18","PC_20")
pcVec <- c("PC_1","PC_2","PC_3","PC_4","PC_5","PC_6","PC_7","PC_8","PC_9","PC_10")
chnkVec <- as.vector(NULL, mode = "character")
plotListCell <- list()
plotListGene <- list()

###############################################################################
## Collect top-enriched genes                                                ##
EnrichedGenesList <- list()
## Done                                                                      ##
###############################################################################

for (i in 1:length(xVec)){
    dfDat <- Obio@dataTableList$meta.data
    dfSel <- dfDat
    selXY <- c(xVec[i], yVec[i])
    colCol <- Obio@parameterList$singleCellClusterString
    
    tag <- paste0(xVec[i], "and", yVec[i])
    tag <- gsub("_", "", tag)
    
    ## Make Cell level PCA
    plotListCell[[tag]] <- ggplot(data=dfDat, aes_string(selXY[1] , selXY[2], col=colCol, shape="sampleID")
    ) + geom_vline(xintercept = 0, color = "grey", size=0.5
    ) + geom_hline(yintercept = 0, color = "grey", size=0.5
    ) + geom_point()+ ggtitle(paste0("PCA - Cell Level")
    ) +  theme(
        axis.text.y   = element_text(size=8),
        axis.text.x   = element_text(size=8),
        axis.title.y  = element_text(size=8),
        axis.title.x  = element_text(size=8),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        plot.title = element_text(hjust = 0.5, size = 12)
    )    
    
    ## Save to file ##
    FNbase <- paste0("PCA.cell.level.", xVec[i],".", yVec[i], ".", VersionPdfExt)
        FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotListCell[[tag]])
    dev.off()
    
    link <- paste0("https://",urlString,"/",Obio@parameterList$project_id,"/pca?x_axis=",gsub("_", "", xVec[i]),"&y_axis=",gsub("_", "", yVec[i]))
    
    figCap <- paste0(
        "**Figure, " ,figureCount,"A:** Cell-level PCA plot for dimensions ", xVec[i], " and ", yVec[i],". ", 
        "Download a pdf of this figure [here](", FNrel,"). ",
        "An interactive version of this figure can be found [here](", link, "). "
    )
    
    
    NewChnk <- paste0(
        "#### PCA Cell Level ", xVec[i], " and ",yVec[i],
        "\n```{r PCAcells_", i,
        ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figCap,"'}\n",
        "\n",
        "\n print(plotListCell[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    ## Done with cells                                                       ##
    ###########################################################################
    
    ###########################################################################
    ## Start with genes                                                      ##
    dfPCADat <- data.frame(Loadings(OsC, reduction = "pca"))
        dfPCADat[["gene"]] <- row.names(dfPCADat)
    
    dfPCADat <- gather(
        dfPCADat, 
        condition, 
        measurement, 1:(ncol(dfPCADat)-1), 
        factor_key=TRUE
    )

    Obio@dataTableList[["dfPCAloadings"]] <- dfPCADat

    ## Make Gene Level PCA ##
    dfPCADat <- data.frame(Loadings(OsC, reduction = "pca"))
    dfPCADat[["gene"]] <- row.names(dfPCADat)
    dfPCADat <- gather(
        dfPCADat,
        condition,
        measurement, 1:(ncol(dfPCADat)-1),
        factor_key=TRUE
    )
    
    dfLoad <- dfPCADat
    Obio@dataTableList$dfPCAloadings <- dfLoad
    ## Plot ##
    selXY <- c(xVec[i], yVec[i])
    dfSel <- filter(dfLoad, condition %in% selXY)
    dfSel <- dfSel %>% spread(key=condition, value=measurement)
    row.names(dfSel) <- dfSel$gene
    dfSel[["highlight"]] <- ""
    dfSel <- dfSel[order(dfSel[,selXY[1]], decreasing = FALSE), ]
    dfSel[1:15, "highlight"] <- "+"
    EnrichedGenesList[[paste0(selXY[1], "_neg")]]<- as.vector(dfSel$gene[1:15])
    
    dfSel <- dfSel[order(dfSel[,selXY[1]], decreasing = TRUE), ]
    dfSel[1:15, "highlight"] <- "+"
    EnrichedGenesList[[paste0(selXY[1], "_pos")]]<- as.vector(dfSel$gene[1:15])
    
    dfSel <- dfSel[order(dfSel[,selXY[2]], decreasing = FALSE), ]
    dfSel[1:15, "highlight"] <- "+"
    EnrichedGenesList[[paste0(selXY[2], "_neg")]]<- as.vector(dfSel$gene[1:15])
    
    dfSel <- dfSel[order(dfSel[,selXY[2]], decreasing = TRUE), ]
    dfSel[1:15, "highlight"] <- "+"
    EnrichedGenesList[[paste0(selXY[2], "_pos")]]<- as.vector(dfSel$gene[1:15])
    
    plotListGene[[tag]] <- ggplot(data=dfSel, aes_string(x=selXY[1],y=selXY[2], col="highlight")
    ) + geom_vline(xintercept = 0, color = "grey", size=0.5
    ) + geom_hline(yintercept = 0, color = "grey", size=0.5) + geom_point() + scale_color_manual(values=c("black", "red")) + ggtitle(paste0("PCA - Cell Level")
    ) +  theme(
        axis.text.y   = element_text(size=8),
        axis.text.x   = element_text(size=8),
        axis.title.y  = element_text(size=8),
        axis.title.x  = element_text(size=8),
        axis.line = element_line(colour = "black"),
        panel.border = element_rect(colour = "black", fill=NA, size=1),
        plot.title = element_text(hjust = 0.5, size = 12)
    )  
    
    points <-  as.vector(unique(dfSel[dfSel$highlight=="+", "gene"]))
    plotListGene[[tag]] <- LabelPoints(plot = plotListGene[[tag]], points =points, repel = TRUE, xnudge = 0, ynudge = 0)
    
    ## Save to file ##
    FNbase <- paste0("PCA.cell.level.", xVec[i],".", yVec[i],".", VersionPdfExt)
        FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotListGene[[tag]])
    dev.off()
    
    dim1 <- gsub("PC_", "", xVec[i])
    dim2 <- gsub("PC_", "", yVec[i])
    link <- paste0(
        " https://",urlString,"/",
        Obio@parameterList$project_id, 
        "/scatterplot?x_axis=add_counts_PCA_Dim_",
        dim1, 
        "_Loadings&y_axis=add_counts_PCA_Dim_",
        dim2,
        "_Loadings&highlight_gene=&cat_id=ag_lab_categories__10"
    )
    
    figCap <- paste0(
        "**Figure, " ,figureCount,"B:**Gene-level PCA plot for dimensions ", xVec[i], " and ", yVec[i], ". An interactive version of this figure can be found [here](", link, "). "
    )
   
    
    NewChnk <- paste0(
        "\n```{r PCA_gene_level_", i,
        ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figCap,"'}\n",
        "\n",
        "\n print(plotListGene[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
    
    ## Done with genes                                                       ##
    ###########################################################################
    figureCount <- figureCount + 1
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

PCA Cell Level PC_1 and PC_2

**Figure, 33A:** Cell-level PCA plot for dimensions PC_1 and PC_2. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_1.PC_2..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC1&y_axis=PC2).

Figure, 33A: Cell-level PCA plot for dimensions PC_1 and PC_2. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 33B:**Gene-level PCA plot for dimensions PC_1 and PC_2. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_1_Loadings&y_axis=add_counts_PCA_Dim_2_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 33B:Gene-level PCA plot for dimensions PC_1 and PC_2. An interactive version of this figure can be found here.

PCA Cell Level PC_3 and PC_4

**Figure, 34A:** Cell-level PCA plot for dimensions PC_3 and PC_4. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_3.PC_4..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC3&y_axis=PC4).

Figure, 34A: Cell-level PCA plot for dimensions PC_3 and PC_4. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 34B:**Gene-level PCA plot for dimensions PC_3 and PC_4. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_3_Loadings&y_axis=add_counts_PCA_Dim_4_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 34B:Gene-level PCA plot for dimensions PC_3 and PC_4. An interactive version of this figure can be found here.

PCA Cell Level PC_5 and PC_6

**Figure, 35A:** Cell-level PCA plot for dimensions PC_5 and PC_6. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_5.PC_6..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC5&y_axis=PC6).

Figure, 35A: Cell-level PCA plot for dimensions PC_5 and PC_6. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 35B:**Gene-level PCA plot for dimensions PC_5 and PC_6. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_5_Loadings&y_axis=add_counts_PCA_Dim_6_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 35B:Gene-level PCA plot for dimensions PC_5 and PC_6. An interactive version of this figure can be found here.

PCA Cell Level PC_7 and PC_8

**Figure, 36A:** Cell-level PCA plot for dimensions PC_7 and PC_8. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_7.PC_8..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC7&y_axis=PC8).

Figure, 36A: Cell-level PCA plot for dimensions PC_7 and PC_8. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 36B:**Gene-level PCA plot for dimensions PC_7 and PC_8. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_7_Loadings&y_axis=add_counts_PCA_Dim_8_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 36B:Gene-level PCA plot for dimensions PC_7 and PC_8. An interactive version of this figure can be found here.

PCA Cell Level PC_9 and PC_10

**Figure, 37A:** Cell-level PCA plot for dimensions PC_9 and PC_10. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_9.PC_10..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC9&y_axis=PC10).

Figure, 37A: Cell-level PCA plot for dimensions PC_9 and PC_10. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 37B:**Gene-level PCA plot for dimensions PC_9 and PC_10. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_9_Loadings&y_axis=add_counts_PCA_Dim_10_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 37B:Gene-level PCA plot for dimensions PC_9 and PC_10. An interactive version of this figure can be found here.

PCA Cell Level PC_11 and PC_12

**Figure, 38A:** Cell-level PCA plot for dimensions PC_11 and PC_12. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_11.PC_12..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC11&y_axis=PC12).

Figure, 38A: Cell-level PCA plot for dimensions PC_11 and PC_12. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 38B:**Gene-level PCA plot for dimensions PC_11 and PC_12. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_11_Loadings&y_axis=add_counts_PCA_Dim_12_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 38B:Gene-level PCA plot for dimensions PC_11 and PC_12. An interactive version of this figure can be found here.

PCA Cell Level PC_13 and PC_14

**Figure, 39A:** Cell-level PCA plot for dimensions PC_13 and PC_14. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_13.PC_14..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC13&y_axis=PC14).

Figure, 39A: Cell-level PCA plot for dimensions PC_13 and PC_14. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 39B:**Gene-level PCA plot for dimensions PC_13 and PC_14. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_13_Loadings&y_axis=add_counts_PCA_Dim_14_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 39B:Gene-level PCA plot for dimensions PC_13 and PC_14. An interactive version of this figure can be found here.

PCA Cell Level PC_15 and PC_16

**Figure, 40A:** Cell-level PCA plot for dimensions PC_15 and PC_16. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_15.PC_16..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC15&y_axis=PC16).

Figure, 40A: Cell-level PCA plot for dimensions PC_15 and PC_16. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 40B:**Gene-level PCA plot for dimensions PC_15 and PC_16. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_15_Loadings&y_axis=add_counts_PCA_Dim_16_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 40B:Gene-level PCA plot for dimensions PC_15 and PC_16. An interactive version of this figure can be found here.

PCA Cell Level PC_17 and PC_18

**Figure, 41A:** Cell-level PCA plot for dimensions PC_17 and PC_18. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_17.PC_18..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC17&y_axis=PC18).

Figure, 41A: Cell-level PCA plot for dimensions PC_17 and PC_18. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 41B:**Gene-level PCA plot for dimensions PC_17 and PC_18. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_17_Loadings&y_axis=add_counts_PCA_Dim_18_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 41B:Gene-level PCA plot for dimensions PC_17 and PC_18. An interactive version of this figure can be found here.

PCA Cell Level PC_19 and PC_20

**Figure, 42A:** Cell-level PCA plot for dimensions PC_19 and PC_20. Download a pdf of this figure [here](report_figures/PCA.cell.level.PC_19.PC_20..V20201005.pdf). An interactive version of this figure can be found [here](https://biologic.crick.ac.uk/rll358A/pca?x_axis=PC19&y_axis=PC20).

Figure, 42A: Cell-level PCA plot for dimensions PC_19 and PC_20. Download a pdf of this figure here. An interactive version of this figure can be found here.

**Figure, 42B:**Gene-level PCA plot for dimensions PC_19 and PC_20. An interactive version of this figure can be found [here]( https://biologic.crick.ac.uk/rll358A/scatterplot?x_axis=add_counts_PCA_Dim_19_Loadings&y_axis=add_counts_PCA_Dim_20_Loadings&highlight_gene=&cat_id=ag_lab_categories__10).

Figure, 42B:Gene-level PCA plot for dimensions PC_19 and PC_20. An interactive version of this figure can be found here.

Characterize PCA Dimensions

selVec <- c(Obio@parameterList$singleCellClusterString, names(OsC@meta.data)[grep("^PC", names(OsC@meta.data))])

dfPCdat <- OsC@meta.data[, selVec]

ymin <- 1.1*min(dfPCdat$PC_1)
ymax <- 1.1*max(dfPCdat$PC_1)

clusterVec <- sort(unique(OsC@meta.data[,Obio@parameterList$singleCellClusterString]))

library(scales)
clusterColVec <- hue_pal()(length(clusterVec))

i=1

chnkVec <- as.vector(NULL, mode="character")
plotList <- list()

for (i in 1:length(clusterVec)){
    dfTemp <- dfPCdat[dfPCdat[,Obio@parameterList$singleCellClusterString] == clusterVec[i],]
    dfTemp[,Obio@parameterList$singleCellClusterString] <- NULL
  
  
  
    library(tidyr)
    dfTemp <- gather(dfTemp, PC)
  
    orderVec <- sort(as.numeric(gsub("PC_", "",unique(dfTemp$PC))))
    orderVec <- paste0("PC_", orderVec)
  
    dfTemp$PC <- factor(dfTemp$PC, levels = orderVec)
    Ncolumns <- length(unique(dfTemp$PC))
  
    a <- paste0("Cluster_", clusterVec[i])

    tag <- paste0("PCA_Distributions_", a)

    plotList[[tag]] <-ggplot(
        dfTemp, 
        aes(x=PC, y=value, fill = PC)
        ) + geom_hline(yintercept = 0, color = "black", size=0.5
        )  + geom_jitter(width=0.1,alpha=0.2
        ) + geom_boxplot(
        ) +    theme(
            legend.position = "none",
            axis.text.y   = element_text(size=8),
            axis.text.x   = element_text(size=8, angle = 45,vjust = 1, hjust=1),
            axis.title.y  = element_text(size=8),
            axis.title.x  = element_text(size=8),
            axis.line = element_line(colour = "black"),
            panel.border = element_rect(colour = "black", fill=NA, size=1),
            plot.title = element_text(hjust = 0.5, size = 12)
        ) + ggtitle(paste0("PCA Distribution: ", a)
        ) + ylim(ymin, ymax) + scale_fill_manual(values=rep(clusterColVec[i], Ncolumns))
    
    ## Save to file ##
    FNbase <- paste0(tag, VersionPdfExt)
        FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
    FNrel <- paste0("report_figures/", FNbase)
    
    pdf(FN)
        print(plotList[[tag]])
    dev.off()
    
    
    figCap <- paste0(
        "**Figure, " ,
        figureCount,
        ":** This plot may help you to identify PCA dimensions, in which marker genes for cluster ",
        clusterVec[i],
        " become evident. Download a pdf of this figure [here](", FNrel,"). "
    )
    
    figureCount <- figureCount + 1
    
    NewChnk <- paste0(
        "#### ", tag,
        "\n```{r ", tag,
        ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
        figCap,"'}\n",
        "\n",
        "\n print(plotList[['",tag,"']])",
        "\n cat(  '\n')",
        "\n\n\n```\n"   
    )
    
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
}
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

PCA_Distributions_Cluster_0

**Figure, 43:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 0 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_0.V20201005.pdf).

Figure, 43: This plot may help you to identify PCA dimensions, in which marker genes for cluster 0 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_1

**Figure, 44:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 1 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_1.V20201005.pdf).

Figure, 44: This plot may help you to identify PCA dimensions, in which marker genes for cluster 1 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_2

**Figure, 45:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 2 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_2.V20201005.pdf).

Figure, 45: This plot may help you to identify PCA dimensions, in which marker genes for cluster 2 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_3

**Figure, 46:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 3 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_3.V20201005.pdf).

Figure, 46: This plot may help you to identify PCA dimensions, in which marker genes for cluster 3 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_4

**Figure, 47:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 4 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_4.V20201005.pdf).

Figure, 47: This plot may help you to identify PCA dimensions, in which marker genes for cluster 4 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_5

**Figure, 48:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 5 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_5.V20201005.pdf).

Figure, 48: This plot may help you to identify PCA dimensions, in which marker genes for cluster 5 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_6

**Figure, 49:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 6 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_6.V20201005.pdf).

Figure, 49: This plot may help you to identify PCA dimensions, in which marker genes for cluster 6 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_7

**Figure, 50:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 7 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_7.V20201005.pdf).

Figure, 50: This plot may help you to identify PCA dimensions, in which marker genes for cluster 7 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_8

**Figure, 51:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 8 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_8.V20201005.pdf).

Figure, 51: This plot may help you to identify PCA dimensions, in which marker genes for cluster 8 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_9

**Figure, 52:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 9 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_9.V20201005.pdf).

Figure, 52: This plot may help you to identify PCA dimensions, in which marker genes for cluster 9 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_10

**Figure, 53:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 10 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_10.V20201005.pdf).

Figure, 53: This plot may help you to identify PCA dimensions, in which marker genes for cluster 10 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_11

**Figure, 54:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 11 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_11.V20201005.pdf).

Figure, 54: This plot may help you to identify PCA dimensions, in which marker genes for cluster 11 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_12

**Figure, 55:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 12 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_12.V20201005.pdf).

Figure, 55: This plot may help you to identify PCA dimensions, in which marker genes for cluster 12 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_13

**Figure, 56:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 13 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_13.V20201005.pdf).

Figure, 56: This plot may help you to identify PCA dimensions, in which marker genes for cluster 13 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_14

**Figure, 57:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 14 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_14.V20201005.pdf).

Figure, 57: This plot may help you to identify PCA dimensions, in which marker genes for cluster 14 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_15

**Figure, 58:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 15 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_15.V20201005.pdf).

Figure, 58: This plot may help you to identify PCA dimensions, in which marker genes for cluster 15 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_16

**Figure, 59:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 16 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_16.V20201005.pdf).

Figure, 59: This plot may help you to identify PCA dimensions, in which marker genes for cluster 16 become evident. Download a pdf of this figure here.

PCA_Distributions_Cluster_17

**Figure, 60:** This plot may help you to identify PCA dimensions, in which marker genes for cluster 17 become evident. Download a pdf of this figure [here](report_figures/PCA_Distributions_Cluster_17.V20201005.pdf).

Figure, 60: This plot may help you to identify PCA dimensions, in which marker genes for cluster 17 become evident. Download a pdf of this figure here.

Characterize PCA Dimensions

library(knitr)
library(ggplot2)

#save.image("temp.RData")


PCAdimensions <- paste0("PC_", 1:20)

plotList <- list()
chnkVec <- as.vector(NULL, mode = "character")

for (j in 1:length(PCAdimensions)){
    posTestGeneSet <- as.vector(
        unique(
            EnrichedGenesList[[paste0(PCAdimensions[j], "_pos")]]
        )
    )
    
    
    negTestGeneSet <- as.vector(
        unique(
            EnrichedGenesList[[paste0(PCAdimensions[j], "_neg")]]
        )
    )
    
    ## Get background gene set ##
    #backgroundGeneVec <- row.names(OsC[["RNA"]]@counts)
    if ((length(posTestGeneSet) >= 3) |(length(negTestGeneSet) >= 3)){
        library(enrichR)
        topMaxCat <- 10
        dbs <- listEnrichrDbs()
        
        dbs <- c("GO_Biological_Process_2017")
        
        
        PosEnriched <- enrichr(posTestGeneSet, dbs)
        
        for (i in 1:length(dbs)){
            dfTemp <- PosEnriched[[dbs[i]]]
            
            if (i ==1){
                dfPosEnriched <- dfTemp
            } else {
                dfPosEnriched <- rbind(
                    dfPosEnriched,
                    dfTemp
                )
            }
            
        }
        
        dfPosEnriched[["log10FDR"]] <- -1*log10(dfPosEnriched$Adjusted.P.value)
        dfPosEnriched <- dfPosEnriched[order(-dfPosEnriched$log10FDR),]
        dfPosEnriched <- na.omit(dfPosEnriched)
        
        ## Negative Side ##
        NegEnriched <- enrichr(negTestGeneSet, dbs)
        
        for (i in 1:length(dbs)){
            dfTemp <- NegEnriched[[dbs[i]]]
            
            if (i ==1){
                dfNegEnriched <- dfTemp
            } else {
                dfNegEnriched <- rbind(
                    dfNegEnriched,
                    dfTemp
                )
            }
            
        }
        
        
        dfNegEnriched[["log10FDR"]] <- -1*log10(dfNegEnriched$Adjusted.P.value)
        dfNegEnriched <- dfNegEnriched[order(-dfPosEnriched$log10FDR),]
        dfNegEnriched <- na.omit(dfNegEnriched)
        
        dfNegSel <- dfNegEnriched
        if (nrow(dfNegSel) > topMaxCat){
            dfNegSel <- dfNegSel[1:topMaxCat,]
        }
        
        dfPosSel <- dfPosEnriched
        if (nrow(dfPosSel) > topMaxCat){
            dfPosSel <- dfPosSel[1:topMaxCat,]
        }
        
        if ((nrow(dfNegEnriched) > 0) | (nrow(dfPosEnriched) > 0)){
            
            
            dfNegSel$log10FDR <- -1* dfNegSel$log10FDR
            
            dfSel <- rbind(
                dfNegSel,
                dfPosSel
            )
            
            dfSel <- na.omit(dfSel)
            dfSel <- dfSel[order(dfSel$log10FDR),]
            dfSel$log10FDR <- round(dfSel$log10FDR, 2)
            
            dfSel[["Category"]] <- ""
            dfSel[dfSel$log10FDR >= 0, "Category"] <- "Enr."
            dfSel[dfSel$log10FDR < 0, "Category"] <- "Depl."
            
            for (k in 1:nrow(dfSel)){
                if (nchar(dfSel[k, "Term"]) > 50 & length(grep("\\(GO", as.vector(dfSel[k, "Term"]))) > 0){
                    part1 <- unlist(strsplit(as.vector(dfSel[k, "Term"]), "\\(GO"))[1]
                    part1 <- substr(part1, 1, 45)
                    part2 <- unlist(strsplit(as.vector(dfSel[k, "Term"]), "\\(GO"))[2]
                    part2 <- paste0("\\(GO", part2)
                    
                    if (nchar(part1) > 40 ){
                        dfSel[k, "Term"] <- paste0(part1, " \\n", part2)
                    } else { 
                        dfSel[k, "Term"] <- paste0(part1, " ", part2)
                    }
                }
            }
            
            
            #dfSel$Term <- gsub("\\(GO", "\\\n\\(GO", dfSel$Term)
            
            dfSel$Term <- factor(dfSel$Term, levels = unique(dfSel$Term))
            
            plotList[[paste0("PCA_ENR_", j)]] <- ggplot(
                data=dfSel, aes(x= Term, y=log10FDR, fill=Category, order=log10FDR)
            ) + geom_bar(stat="identity", colour="black"
            ) + coord_flip() +scale_fill_manual(values=c("yellow", "blue"))  +  theme(
                axis.text.y   = element_text(size=8),
                axis.text.x   = element_text(size=8),
                axis.title.y  = element_text(size=8),
                axis.title.x  = element_text(size=8),
                axis.line = element_line(colour = "black"),
                panel.border = element_rect(colour = "black", fill=NA, size=1),
                plot.title = element_text(hjust = 0.5, size = 12)
            )  + labs(title = paste0("Cluster ", PCAdimensions[j]," enriched genes") ,y = "-log10(FDR)", x = ""
            ) + geom_hline(yintercept = c(-log10(0.05), log10(0.05)), color = "red", size=0.5, lty=2
            ) + geom_hline(yintercept = 0, color = "black", size=0.5
            )
            cat("  \n")
            
            
            
            ## Save to file ##
            FNbase <- paste0("PCA_Cluster_", PCAdimensions[j],".enriched.genes", VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
           
            pdf(FN)
            print(plotList[[paste0("PCA_ENR_", j)]])
            dev.off()
            
            link <- paste0(
                "https://", urlString, "/",
                Obio@parameterList$project_id,
                "/category-view?category_type=GO-BP"
            )
            
            ## Create R markdown chunk ##
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                "**: GO-BP category enrichment analysis for the 15 genes that have  <font color = \\'yellow\\'> the most positive </font> and <font color = \\'blue\\'>the most negative</font> PCA loading values in dimension ", 
               PCAdimensions[j],
                " associated with them. Download a pdf of this figure [here](", FNrel, "). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](",link,") and find these categories using the search box."
            )
            figureCount <- figureCount + 1 
            
            NewChnk <- paste0(
                "#### ", PCAdimensions[j],
                "\n```{r enrichr_",
                j,", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",paste0("PCA_ENR_", j),"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
        }
    }
    chnkVec <- c(
        chnkVec,
        NewChnk
    )
}
###############################################################################
## Do category enrichment on clusters                                        ##
cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))

PC_1

**Figure 61**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_1 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_1.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 61: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_1 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_2

**Figure 62**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_2 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_2.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 62: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_2 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_3

**Figure 63**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_3 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_3.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 63: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_3 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_4

**Figure 64**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_4 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_4.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 64: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_4 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_5

**Figure 65**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_5 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_5.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 65: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_5 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_6

**Figure 66**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_6 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_6.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 66: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_6 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_7

**Figure 67**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_7 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_7.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 67: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_7 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_8

**Figure 68**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_8 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_8.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 68: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_8 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_9

**Figure 69**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_9 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_9.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 69: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_9 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_10

**Figure 70**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_10 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_10.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 70: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_10 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_11

**Figure 71**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_11 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_11.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 71: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_11 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_12

**Figure 72**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_12 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_12.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 72: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_12 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_13

**Figure 73**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_13 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_13.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 73: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_13 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_14

**Figure 74**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_14 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_14.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 74: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_14 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_15

**Figure 75**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_15 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_15.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 75: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_15 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_16

**Figure 76**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_16 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_16.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 76: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_16 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_17

**Figure 77**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_17 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_17.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 77: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_17 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_18

**Figure 78**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_18 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_18.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 78: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_18 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_19

**Figure 79**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_19 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_19.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 79: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_19 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

PC_20

**Figure 80**: GO-BP category enrichment analysis for the 15 genes that have  <font color = 'yellow'> the most positive </font> and <font color = 'blue'>the most negative</font> PCA loading values in dimension PC_20 associated with them. Download a pdf of this figure [here](report_figures/PCA_Cluster_PC_20.enriched.genes.V20201005.pdf). To view these gene sets in the context of your data, go to [CategoryView > GO-BP](https://biologic.crick.ac.uk/rll358A/category-view?category_type=GO-BP) and find these categories using the search box.

Figure 80: GO-BP category enrichment analysis for the 15 genes that have the most positive and the most negative PCA loading values in dimension PC_20 associated with them. Download a pdf of this figure here. To view these gene sets in the context of your data, go to CategoryView > GO-BP and find these categories using the search box.

## Done doing enrichment on clusters                                         ##
###############################################################################

Pseudotime

## Add default ##
pos <- grep("addDmaps", names(Obio@parameterList))
if (length(pos) == 0){
    Obio@parameterList[["addDmaps"]] <- FALSE
}

if (Obio@parameterList$addDmaps){
    library(knitr)
    library(ggplot2)
    invertPT <- TRUE
    
    
    plotList <- list()
    chnkVec <- as.vector(NULL, mode = "character")
    
    
    
    library(Seurat)
    library(destiny)
    
    dfPCA <- OsC@meta.data
    dfPCA <- dfPCA[,grep("PC_", names(dfPCA))]
    
    dmPCA <- DiffusionMap(dfPCA)
    
    #dpt <- DPT(dm, tips = 268)
    #dpt <- DPT(dm)
    dpt <- DPT(dmPCA)
    #pseudotime <- dpt$dpt
    
    # Plot DC1 vs DC2 and color the cells by their inferred diffusion pseudotime.
    # We can accesss diffusion pseudotime via dpt$dpt.
    df <- data.frame(
      DC1 = eigenvectors(dmPCA)[, 1], 
      DC2 = eigenvectors(dmPCA)[, 2], 
      DC3 = eigenvectors(dmPCA)[, 3], 
      "DM_Pseudotime" = dpt$dpt
    )
    
    df$cellID <- row.names(dfPCA)
    
    ## For this project reverse pseudotime ##
    if (invertPT){
        PTmax <- max(df$DM_Pseudotime)
        df$DM_Pseudotime <- -1* (df$DM_Pseudotime - PTmax)
        ## Invert DC1, 2, 3
        df$DC1 <- -1* df$DC1
        df$DC2 <- -1* df$DC2
        df$DC3 <- -1* df$DC3
    }
    
    ## Add to table ##
    df$cellID <- row.names(dfPCA)
    dfdbTable <- OsC@meta.data
    
    dfdbTable$row_names <- NULL
    dfdbTable$DC1 <- NULL
    dfdbTable$DC3 <- NULL
    dfdbTable$DC2 <- NULL
    dfdbTable$DM_Pseudotime <- NULL
    dfdbTable <- unique(dfdbTable)
    
    ###############################################################################
    ## Add pseudotime components                                                 ##
    #dim(dfdbTable)
    
    dfdbTable[["cellID"]] <- row.names(dfdbTable)
    
    dfdbTable <- merge(
      dfdbTable, 
      df, 
      by.x = "cellID",
      by.y = "cellID"
    )
    
    #dim(dfdbTable)
    
    ## Add to Seurat object ##
    OsC@meta.data <- dfdbTable
    
    ## Create Pseudotime plot ##
    dfTemp <- dfdbTable
    #dotsize <- 0.5
    dotcolor <- "darkblue"
    tag <- "PC1PC2all"
    
    plotList[[tag]] <- ggplot(dfTemp, aes(PC_1, PC_2, color=DM_Pseudotime)
        )+ geom_point( 
            shape = 16,
            size = as.numeric(dotsize)
        ) + xlab("PC1") + ylab("PC2") + scale_color_gradient2(
            low="#ff6600", 
            high=dotcolor #, 
            #limits=c(0,maxExpr)
        ) +  theme(
            axis.text.y   = element_text(size=8),
            axis.text.x   = element_text(size=8),
            axis.title.y  = element_text(size=8),
            axis.title.x  = element_text(size=8),
            axis.line = element_line(colour = "black"),
            panel.border = element_rect(colour = "black", fill=NA, size=1),
            plot.title = element_text(hjust = 0.5, size = 12),
            panel.background = element_rect(fill = "lightgrey")
        ) + ggtitle("PC1, PC2 and DM Pseudotime"
        ) #+ xlim(minX, maxX) + ylim(minY, maxY)  
    
    
    
            ## Save to file ##
            FNbase <- paste0("Pseudotime_overview", VersionPdfExt)
            FN <- paste0(Obio@parameterList$reportFigDir, FNbase)
            FNrel <- paste0("report_figures/", FNbase)
            
           
            pdf(FN)
            print(plotList[[tag]])
            dev.off()
            
            
            
            ## Create R markdown chunk ##
            figLegend <- paste0(
                "**Figure ", 
                figureCount, 
                "**: Figure depicting PCA components 1 and 2 with the diffusion map pseudotime highlighted in color. Download a pdf of this figure [here](", FNrel, "). "
            )
            figureCount <- figureCount + 1 
            
            NewChnk <- paste0(
                "\n#### Pseudotime All Timepoints", 
                "\n```{r ", tag, ", results='asis', echo=F, eval=TRUE, warning=FALSE, fig.cap='",
                figLegend,"'}\n",
                "\n",
                "\n print(plotList[['",tag,"']])",
                "\n cat(  '\n')",
                "\n\n\n```\n"   
            )
            
            chnkVec <- c(
                chnkVec,
                NewChnk
            )
} # end dmap
###############################################################################
## Do category enrichment on clusters                                        ##
if (Obio@parameterList$addDmaps){
    cat(paste(knit(text = chnkVec, quiet = T), collapse = '\n'))
}
## Done doing enrichment on clusters                                         ##
###############################################################################
### Will save Obio object here, so it can be re-used with different parameters
save(Obio, 
     file = paste0(
         Obio@parameterList$localWorkDir,
         Obio@parameterList$project_id,
         ".bioLOGIC.Robj"
     )
)

print("Obio Object saved.")

save(OsC,
    file = paste0(
         Obio@parameterList$localWorkDir,
         Obio@parameterList$project_id,
        ".Seurat.Robj"
     )
)

Documentation

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS:   /camp/apps/misc/stp/babs/manual/software/r/R-3.6.0-foss-2016b/lib64/R/lib/libRblas.so
## LAPACK: /camp/apps/misc/stp/babs/manual/software/r/R-3.6.0-foss-2016b/lib64/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.utf-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.utf-8        LC_COLLATE=en_GB.utf-8    
##  [5] LC_MONETARY=en_GB.utf-8    LC_MESSAGES=en_GB.utf-8   
##  [7] LC_PAPER=en_GB.utf-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.utf-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] enrichR_2.1                 DT_0.14                    
##  [3] AUCell_1.6.1                sleepwalk_0.3.0            
##  [5] ggtree_1.16.6               scales_1.1.1               
##  [7] ggrepel_0.8.2               RMySQL_0.10.20             
##  [9] DBI_1.1.0                   DESeq2_1.24.0              
## [11] SummarizedExperiment_1.14.1 DelayedArray_0.10.0        
## [13] BiocParallel_1.18.1         matrixStats_0.56.0         
## [15] Biobase_2.44.0              GenomicRanges_1.36.1       
## [17] GenomeInfoDb_1.20.0         IRanges_2.18.3             
## [19] S4Vectors_0.22.1            BiocGenerics_0.30.0        
## [21] knitr_1.29                  forcats_0.5.0              
## [23] stringr_1.4.0               purrr_0.3.4                
## [25] readr_1.3.1                 tidyr_1.1.0                
## [27] tibble_3.0.1                tidyverse_1.3.0            
## [29] ggplot2_3.3.2               Seurat_3.1.5               
## [31] dplyr_1.0.0                
## 
## loaded via a namespace (and not attached):
##   [1] reticulate_1.16        R.utils_2.9.2          tidyselect_1.1.0      
##   [4] RSQLite_2.2.0          AnnotationDbi_1.46.1   htmlwidgets_1.5.1     
##   [7] grid_3.6.0             Rtsne_0.15             munsell_0.5.0         
##  [10] codetools_0.2-16       ica_1.0-2              jrc_0.3.1             
##  [13] future_1.17.0          withr_2.2.0            colorspace_1.4-1      
##  [16] highr_0.8              rstudioapi_0.11        ROCR_1.0-11           
##  [19] listenv_0.8.0          labeling_0.3           GenomeInfoDbData_1.2.1
##  [22] bit64_0.9-7            farver_2.0.3           vctrs_0.3.1           
##  [25] treeio_1.8.2           generics_0.0.2         xfun_0.15             
##  [28] R6_2.4.1               rsvd_1.0.3             locfit_1.5-9.4        
##  [31] bitops_1.0-6           assertthat_0.2.1       promises_1.1.1        
##  [34] nnet_7.3-12            gtable_0.3.0           globals_0.12.5        
##  [37] rlang_0.4.6            genefilter_1.66.0      splines_3.6.0         
##  [40] lazyeval_0.2.2         acepack_1.4.1          broom_0.5.6           
##  [43] checkmate_2.0.0        BiocManager_1.30.10    yaml_2.2.1            
##  [46] reshape2_1.4.4         modelr_0.1.8           crosstalk_1.1.0.1     
##  [49] backports_1.1.8        httpuv_1.5.4           Hmisc_4.4-0           
##  [52] tools_3.6.0            ellipsis_0.3.1         RColorBrewer_1.1-2    
##  [55] ggridges_0.5.2         Rcpp_1.0.4.6           plyr_1.8.6            
##  [58] base64enc_0.1-3        zlibbioc_1.30.0        RCurl_1.98-1.2        
##  [61] rpart_4.1-15           pbapply_1.4-2          cowplot_1.0.0         
##  [64] zoo_1.8-8              haven_2.3.1            cluster_2.0.8         
##  [67] fs_1.4.2               magrittr_1.5           data.table_1.12.8     
##  [70] RSpectra_0.16-0        lmtest_0.9-37          reprex_0.3.0          
##  [73] RANN_2.6.1             fitdistrplus_1.1-1     hms_0.5.3             
##  [76] patchwork_1.0.1        mime_0.9               evaluate_0.14         
##  [79] xtable_1.8-4           XML_3.99-0.3           jpeg_0.1-8.1          
##  [82] readxl_1.3.1           gridExtra_2.3          compiler_3.6.0        
##  [85] KernSmooth_2.23-15     crayon_1.3.4           R.oo_1.23.0           
##  [88] htmltools_0.5.0        segmented_1.2-0        later_1.1.0.1         
##  [91] Formula_1.2-3          geneplotter_1.62.0     lubridate_1.7.9       
##  [94] dbplyr_1.4.4           MASS_7.3-51.4          rappdirs_0.3.1        
##  [97] Matrix_1.2-18          cli_2.0.2              R.methodsS3_1.8.0     
## [100] igraph_1.2.5           pkgconfig_2.0.3        rvcheck_0.1.8         
## [103] foreign_0.8-71         plotly_4.9.2.1         xml2_1.3.2            
## [106] annotate_1.62.0        XVector_0.24.0         rvest_0.3.5           
## [109] digest_0.6.25          sctransform_0.2.1      RcppAnnoy_0.0.16      
## [112] tsne_0.1-3             graph_1.62.0           rmarkdown_2.3         
## [115] cellranger_1.1.0       leiden_0.3.3           tidytree_0.3.3        
## [118] htmlTable_2.0.0        uwot_0.1.8             GSEABase_1.46.0       
## [121] curl_4.3               kernlab_0.9-29         shiny_1.5.0           
## [124] rjson_0.2.20           lifecycle_0.2.0        nlme_3.1-139          
## [127] jsonlite_1.7.0         viridisLite_0.3.0      fansi_0.4.1           
## [130] pillar_1.4.4           lattice_0.20-38        fastmap_1.0.1         
## [133] httr_1.4.1             survival_3.2-3         glue_1.4.1            
## [136] png_0.1-7              bit_1.1-15.2           stringi_1.4.6         
## [139] mixtools_1.2.0         blob_1.2.1             latticeExtra_0.6-29   
## [142] memoise_1.1.0          irlba_2.3.3            future.apply_1.6.0    
## [145] ape_5.4

  1. The Francis Crick Institute, ↩︎